Peano
Loading...
Searching...
No Matches
Variant1.cpp
Go to the documentation of this file.
1
27
28#include "CellFaceData.h"
29#include "exahype2/CellData.h"
30#include "kernels/AderSolver/BufferSizes.h"
31#include "kernels/AderSolver/FaceIntegral.h"
32#include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
33#include "kernels/AderSolver/MaxScaledEigenvalue.h"
34#include "kernels/AderSolver/RiemannSolver.h"
35#include "repositories/SolverRepository.h"
36#include "Utils.h"
37#include "Variants.h"
38
39#include <cstring>
40
41tarch::logging::Log variant1::_log("variants1::");
42
44
45inline void initialTask(
46 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
48 const int cellId,
49 const int faceId,
50 tarch::timing::Measurement& measurement
51) {
52
53 // Assume that the faces for a given cell are one after another
54 // in typical Peano order, e.g. left, lower, right, upper
55 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
56 myFaceData->QIn[faceId][0],
57 myFaceData->QIn[faceId][1],
58#if DIMENSIONS == 3
59 myFaceData->QIn[faceId][2],
60#endif
61 myFaceData->QIn[faceId][DIMENSIONS + 0],
62 myFaceData->QIn[faceId][DIMENSIONS + 1]
63#if DIMENSIONS == 3
64 ,
65 myFaceData->QIn[faceId][DIMENSIONS + 2]
66#endif
67 };
68
69 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
70 myFaceData->QOut[faceId][0],
71 myFaceData->QOut[faceId][1],
72#if DIMENSIONS == 3
73 myFaceData->QOut[faceId][2],
74#endif
75 myFaceData->QOut[faceId][DIMENSIONS + 0],
76 myFaceData->QOut[faceId][DIMENSIONS + 1]
77#if DIMENSIONS == 3
78 ,
79 myFaceData->QOut[faceId][DIMENSIONS + 2]
80#endif
81 };
82
83 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
84
85 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
86 repositories::instanceOfAderSolver,
87 lQhbnd,
88 lFhbnd,
89 myCellData->QIn[cellId],
90 myCellData->cellCentre[cellId],
91 myCellData->cellSize[cellId],
92 myCellData->t,
93 myCellData->dt
94 );
95
96 watchKernelCompute.stop();
97 measurement.setValue(watchKernelCompute.getCalendarTime());
98}
99
100inline void firstTask(
102 const int leftFaceId,
103 const int rightFaceId,
104 const int faceDirection,
105 tarch::timing::Measurement& measurement
106) {
107
108 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
109
110 // We are assuming that there are no boundaries okay
111 // In a real ExaHyPE application boundaries will exist
112 // but we can still assume that the corresponding faces
113 // will have received data
114 kernels::AderSolver::riemannSolver<SolverPrecision>(
115 repositories::instanceOfAderSolver,
116 myFaceData->QOut[leftFaceId][faceDirection + DIMENSIONS], // lFhbnd[d+DIMENSIONS],
117 myFaceData->QOut[rightFaceId][faceDirection],
118 myFaceData->QIn[leftFaceId][faceDirection + DIMENSIONS], // lFhbnd[d+DIMENSIONS],
119 myFaceData->QIn[rightFaceId][faceDirection],
120 0.5 * (myFaceData->t[leftFaceId] + myFaceData->t[rightFaceId]),
121 0.5 * (myFaceData->dt[leftFaceId] + myFaceData->dt[rightFaceId]),
122 0.5 * (myFaceData->cellCentre[leftFaceId] + myFaceData->cellCentre[rightFaceId]),
123 0.5 * (myFaceData->cellSize[leftFaceId] + myFaceData->cellSize[rightFaceId]),
124 faceDirection,
125 false,
126 0
127 );
128
129 watchKernelCompute.stop();
130 measurement.setValue(watchKernelCompute.getCalendarTime());
131}
132
133inline void secondTask(
134 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
136 const int cellId,
137 const int faceId,
138 tarch::timing::Measurement& measurement
139) {
140
141 // Assume that the faces for a given cell are one after another
142 // in typical Peano order, e.g. left, lower, right, upper
143 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
144 myFaceData->QIn[faceId][0],
145 myFaceData->QIn[faceId][1],
146#if DIMENSIONS == 3
147 myFaceData->QIn[faceId][2],
148#endif
149 myFaceData->QIn[faceId][DIMENSIONS + 0],
150 myFaceData->QIn[faceId][DIMENSIONS + 1]
151#if DIMENSIONS == 3
152 ,
153 myFaceData->QIn[faceId][DIMENSIONS + 2]
154#endif
155 };
156
157 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
158 myFaceData->QOut[faceId][0],
159 myFaceData->QOut[faceId][1],
160#if DIMENSIONS == 3
161 myFaceData->QOut[faceId][2],
162#endif
163 myFaceData->QOut[faceId][DIMENSIONS + 0],
164 myFaceData->QOut[faceId][DIMENSIONS + 1]
165#if DIMENSIONS == 3
166 ,
167 myFaceData->QOut[faceId][DIMENSIONS + 2]
168#endif
169 };
170
171 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
172
173 for (int d = 0; d < DIMENSIONS; d++) {
174 const int direction = d;
175
176 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
177 // Negative face
178 kernels::AderSolver::faceIntegral(
179 myCellData->QIn[cellId],
180 myFaceData->QOut[faceId][d],
181 direction,
182 0,
183 inverseDxDirection,
184 myCellData->dt
185 );
186
187 // Positive face
188 kernels::AderSolver::faceIntegral(
189 myCellData->QIn[cellId],
190 myFaceData->QOut[faceId][d + DIMENSIONS],
191 direction,
192 1,
193 inverseDxDirection,
194 myCellData->dt
195 );
196
197 } // for d
198
199 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
200 repositories::instanceOfAderSolver,
201 myCellData->QIn[cellId],
202 myCellData->cellCentre[cellId],
203 myCellData->cellSize[cellId],
204 myCellData->t,
205 myCellData->dt
206 );
207
208 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
209 repositories::instanceOfAderSolver,
210 lQhbnd,
211 lFhbnd,
212 myCellData->QIn[cellId],
213 myCellData->cellCentre[cellId],
214 myCellData->cellSize[cellId],
215 myCellData->t,
216 myCellData->dt
217 );
218
219 watchKernelCompute.stop();
220 measurement.setValue(watchKernelCompute.getCalendarTime());
221}
222
223
225 int numberOfCells,
226 double timeStamp,
227 double timeStepSize,
228 const tarch::la::Vector<DIMENSIONS, double> cellCenter,
229 const tarch::la::Vector<DIMENSIONS, double> cellSize
230) {
231
232 tarch::timing::Measurement timingComputeKernel;
233
234 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
236
237 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
238 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
240 tarch::MemoryLocation::Heap
241 );
242 cellData.t = timeStamp;
243 cellData.dt = timeStepSize;
244 cellData.QOut[cellIndex] = nullptr; // tarch::allocateMemory<SolverPrecision>(NumberOfOutputEntriesPerCell,
245 // tarch::MemoryLocation::Heap);
246 cellData.cellCentre[cellIndex] = cellCenter;
247 cellData.cellSize[cellIndex] = cellSize;
248 cellData.maxEigenvalue[cellIndex] = 0.0;
249
250 initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
251 std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
252
253 for (int i = 0; i < 2 * DIMENSIONS; i++) {
254 cellFaceData.QIn[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
255 kernels::AderSolver::getBndFaceSize(),
256 tarch::MemoryLocation::Heap
257 );
258 cellFaceData.QOut[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
259 kernels::AderSolver::getBndFluxSize(),
260 tarch::MemoryLocation::Heap
261 );
262 }
263
264 cellFaceData.t[cellIndex] = timeStamp;
265 cellFaceData.dt[cellIndex] = timeStepSize;
266 cellFaceData.cellCentre[cellIndex] = cellCenter;
267 cellFaceData.cellSize[cellIndex] = cellSize;
268
269 std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
270 }
271
272 int numberOfThreads = 1;
273
274 #if defined(WITH_OPENMP)
275 for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
276 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
277 #endif
278
279 // Initial task
280 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
281 initialTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
282 }
283
284 timingComputeKernel.erase();
285 tarch::timing::Measurement timingKernelLaunch;
286
287 for (int sample = 0; sample <= NumberOfSamples; sample++) {
288
289 tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
290
291 #if defined(WITH_OPENMP)
292 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
293 #endif
294 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
295 // We're not actually simulating a real grid, instead
296 // we are making each cell it's own neighbour, as though
297 // it were a whole periodic domain
298 firstTask(&cellFaceData, cellIndex, cellIndex, 0, timingComputeKernel);
299 firstTask(&cellFaceData, cellIndex, cellIndex, 1, timingComputeKernel);
300 }
301 #if defined(WITH_OPENMP)
302 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
303 #endif
304 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
305 cellData.maxEigenvalue[cellIndex] = 0.0;
306 secondTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
307 }
308
309 watchKernelLaunch.stop();
310 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
311 }
312
313 reportRuntime("variant 1", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
314 // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
315 // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
316
317 #if defined(WITH_OPENMP)
318 }//threadIndex
319 #endif
320
321 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
322 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
323 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
324
325 for (int i = 0; i < 2 * DIMENSIONS; i++) {
326 tarch::freeMemory(cellFaceData.QIn[cellIndex][i], tarch::MemoryLocation::Heap);
327 tarch::freeMemory(cellFaceData.QOut[cellIndex][i], tarch::MemoryLocation::Heap);
328 }
329 }
330}
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant1.cpp:133
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant1.cpp:45
void firstTask(exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int leftFaceId, const int rightFaceId, const int faceDirection, tarch::timing::Measurement &measurement)
Definition Variant1.cpp:100
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition Utils.h:68
constexpr int NumberOfOutputEntriesPerCell
Definition Utils.h:20
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition Utils.h:30
tarch::logging::Log _log
This is variant 1 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition Variant1.cpp:224
Represents the faces of one cell, with a total of 2*Dim faces per cell For ADER QIn will contain the ...
tarch::la::Vector< DIMENSIONS, double > * cellSize
tarch::la::Vector< DIMENSIONS, double > * cellCentre
outType *(* QOut)[2 *DIMENSIONS]
Out values.
inType *(* QIn)[2 *DIMENSIONS]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.