Peano
Loading...
Searching...
No Matches
Variant4.cpp
Go to the documentation of this file.
1
24
25#include "exahype2/CellData.h"
26#include "kernels/AderSolver/BufferSizes.h"
27#include "kernels/AderSolver/FaceIntegral.h"
28#include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
29#include "kernels/AderSolver/MaxScaledEigenvalue.h"
30#include "kernels/AderSolver/RiemannSolver.h"
31#include "LRFaceData.h"
32#include "repositories/SolverRepository.h"
33#include "Utils.h"
34#include "Variants.h"
35
36#include <cstring>
37
38tarch::logging::Log variant4::_log("variants4::");
39
41
42inline void initialTask(
43 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
45 const int cellId,
46 const int faceId,
47 tarch::timing::Measurement& measurement
48) {
49
50 // Assume that the faces for a given cell are one after another in FaceData
51 // in typical Peano order, e.g. left, lower, right, upper
52 // so faceId is the left, lower (front) face,
53 // faceId+1 is the one after this, and so on.
54 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
55 myFaceData->QIn[faceId + 0][1],
56 myFaceData->QIn[faceId + 1][1],
57#if DIMENSIONS == 3
58 myFaceData->QIn[faceId + 2][1],
59#endif
60 myFaceData->QIn[faceId + DIMENSIONS + 0][0],
61 myFaceData->QIn[faceId + DIMENSIONS + 1][0]
62#if DIMENSIONS == 3
63 ,
64 myFaceData->QIn[faceId + DIMENSIONS + 2][0]
65#endif
66 };
67
68 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
69 myFaceData->QOut[faceId + 0][1],
70 myFaceData->QOut[faceId + 1][1],
71#if DIMENSIONS == 3
72 myFaceData->QOut[faceId + 2][1],
73#endif
74 myFaceData->QOut[faceId + DIMENSIONS + 0][0],
75 myFaceData->QOut[faceId + DIMENSIONS + 1][0]
76#if DIMENSIONS == 3
77 ,
78 myFaceData->QOut[faceId + DIMENSIONS + 2][0]
79#endif
80 };
81
82 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
83
84 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
85 repositories::instanceOfAderSolver,
86 lQhbnd,
87 lFhbnd,
88 myCellData->QIn[cellId],
89 myCellData->cellCentre[cellId],
90 myCellData->cellSize[cellId],
91 myCellData->t,
92 myCellData->dt
93 );
94
95 watchKernelCompute.stop();
96 measurement.setValue(watchKernelCompute.getCalendarTime());
97}
98
99
101 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
103 const int cellId,
104 const int faceId,
105 tarch::timing::Measurement& measurement
106) {
107
108 // Assume that the faces for a given cell are one after another
109 // in typical Peano order, e.g. left, lower, right, upper
110 // Assume that the faces for a given cell are one after another
111 // in typical Peano order, e.g. left, lower, right, upper
112 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
113 myFaceData->QIn[faceId + 0][1],
114 myFaceData->QIn[faceId + 1][1],
115#if DIMENSIONS == 3
116 myFaceData->QIn[faceId + 2][1],
117#endif
118 myFaceData->QIn[faceId + DIMENSIONS + 0][0],
119 myFaceData->QIn[faceId + DIMENSIONS + 1][0]
120#if DIMENSIONS == 3
121 ,
122 myFaceData->QIn[faceId + DIMENSIONS + 2][0]
123#endif
124 };
125
126 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
127 myFaceData->QOut[faceId + 0][1],
128 myFaceData->QOut[faceId + 1][1],
129#if DIMENSIONS == 3
130 myFaceData->QOut[faceId + 2][1],
131#endif
132 myFaceData->QOut[faceId + DIMENSIONS + 0][0],
133 myFaceData->QOut[faceId + DIMENSIONS + 1][0]
134#if DIMENSIONS == 3
135 ,
136 myFaceData->QOut[faceId + DIMENSIONS + 2][0]
137#endif
138 };
139
140 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
141
142 for (int d = 0; d < DIMENSIONS; d++) {
143 const int direction = d;
144
145 kernels::AderSolver::riemannSolver<SolverPrecision>(
146 repositories::instanceOfAderSolver,
147 myFaceData->QOut[faceId + d][0],
148 myFaceData->QOut[faceId + d][1],
149 myFaceData->QIn[faceId + d][0],
150 myFaceData->QIn[faceId + d][1],
151 myFaceData->t[faceId + d],
152 myFaceData->dt[faceId + d],
153 myFaceData->faceCentre[faceId + d],
154 myFaceData->faceSize[faceId + d],
155 direction,
156 false,
157 0
158 );
159
160 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
161 // Negative face
162 kernels::AderSolver::faceIntegral(
163 myCellData->QIn[cellId],
164 myFaceData->QOut[faceId + d][1],
165 direction,
166 0,
167 inverseDxDirection,
168 myCellData->dt
169 );
170
171 kernels::AderSolver::riemannSolver<SolverPrecision>(
172 repositories::instanceOfAderSolver,
173 myFaceData->QOut[faceId + d + DIMENSIONS][0],
174 myFaceData->QOut[faceId + d + DIMENSIONS][1],
175 myFaceData->QIn[faceId + d + DIMENSIONS][0],
176 myFaceData->QIn[faceId + d + DIMENSIONS][1],
177 myFaceData->t[faceId + d + DIMENSIONS],
178 myFaceData->dt[faceId + d + DIMENSIONS],
179 myFaceData->faceCentre[faceId + d + DIMENSIONS],
180 myFaceData->faceSize[faceId + d + DIMENSIONS],
181 direction,
182 false,
183 0
184 );
185
186 // Positive face
187 kernels::AderSolver::faceIntegral(
188 myCellData->QIn[cellId],
189 myFaceData->QOut[faceId + DIMENSIONS][0],
190 direction,
191 1,
192 inverseDxDirection,
193 myCellData->dt
194 );
195
196 } // for d
197
198 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
199 repositories::instanceOfAderSolver,
200 myCellData->QIn[cellId],
201 myCellData->cellCentre[cellId],
202 myCellData->cellSize[cellId],
203 myCellData->t,
204 myCellData->dt
205 );
206
207 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
208 repositories::instanceOfAderSolver,
209 lQhbnd,
210 lFhbnd,
211 myCellData->QIn[cellId],
212 myCellData->cellCentre[cellId],
213 myCellData->cellSize[cellId],
214 myCellData->t,
215 myCellData->dt
216 );
217
218 watchKernelCompute.stop();
219 measurement.setValue(watchKernelCompute.getCalendarTime());
220}
221
222
224 int numberOfCells,
225 double timeStamp,
226 double timeStepSize,
227 const tarch::la::Vector<DIMENSIONS, double> cellCenter,
228 const tarch::la::Vector<DIMENSIONS, double> cellSize
229) {
230
231 tarch::timing::Measurement timingComputeKernel;
232
233 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
234 exahype2::FaceData<SolverPrecision, SolverPrecision> faceData(2 * DIMENSIONS * numberOfCells);
235
236 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
237 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
239 tarch::MemoryLocation::Heap
240 );
241 cellData.t = timeStamp;
242 cellData.dt = timeStepSize;
243 cellData.QOut[cellIndex] = nullptr;
244 cellData.cellCentre[cellIndex] = cellCenter;
245 cellData.cellSize[cellIndex] = cellSize;
246 cellData.maxEigenvalue[cellIndex] = 0.0;
247
248 initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
249 }
250
251 // We need one instance of face per face of a cell, e.g.
252 // 2*DIMENSIONS*numberOfCells faces.
253 // Here we just number them in the Peano order for each cell,
254 // so face (2*DIMENSIONS*i+j) will be the jth face of the ith cell
255 for (int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
256 // Allocating data for the left and right sides of a given face
257 faceData.QIn[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
258 kernels::AderSolver::getBndFaceSize(),
259 tarch::MemoryLocation::Heap
260 );
261 faceData.QIn[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
262 kernels::AderSolver::getBndFaceSize(),
263 tarch::MemoryLocation::Heap
264 );
265 faceData.QOut[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
266 kernels::AderSolver::getBndFluxSize(),
267 tarch::MemoryLocation::Heap
268 );
269 faceData.QOut[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
270 kernels::AderSolver::getBndFluxSize(),
271 tarch::MemoryLocation::Heap
272 );
273
274 faceData.t[faceIndex] = timeStamp;
275 faceData.dt[faceIndex] = timeStepSize;
276 faceData.faceCentre[faceIndex] = cellCenter;
277 faceData.faceSize[faceIndex] = cellSize;
278 }
279
280 int numberOfThreads = 1;
281
282 #if defined(WITH_OPENMP)
283 for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
284 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
285 #endif
286
287 // Initial task
288 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
290 &cellData,
291 &faceData,
292 cellIndex,
293 cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
294 // arithmetic
296 );
297 }
298
299 timingComputeKernel.erase();
300 tarch::timing::Measurement timingKernelLaunch;
301
302 for (int sample = 0; sample <= NumberOfSamples; sample++) {
303
304 #if defined(WITH_OPENMP)
305 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
306 #endif
307 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
308
309 // Copy data from one side of each face to the other
310 // Here we just copy data from one side of a given cell's face to the opposite face of the same cell
311 for (int d = 0; d < DIMENSIONS; d++) {
312 // Copy inner negative face into outer positive face
313 std::copy_n(
314 faceData.QIn[2 * DIMENSIONS * cellIndex + d][1],
315 kernels::AderSolver::getBndFaceSize(),
316 faceData.QIn[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][1]
317 );
318 std::copy_n(
319 faceData.QOut[2 * DIMENSIONS * cellIndex + d][1],
320 kernels::AderSolver::getBndFluxSize(),
321 faceData.QOut[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][1]
322 );
323
324 // Copy inner positive face into outer negative face
325 std::copy_n(
326 faceData.QIn[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][0],
327 kernels::AderSolver::getBndFaceSize(),
328 faceData.QIn[2 * DIMENSIONS * cellIndex + d][0]
329 );
330 std::copy_n(
331 faceData.QOut[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][0],
332 kernels::AderSolver::getBndFluxSize(),
333 faceData.QOut[2 * DIMENSIONS * cellIndex + d][0]
334 );
335 }
336 }
337
338 tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
339
340 #if defined(WITH_OPENMP)
341 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
342 #endif
343 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
344 cellData.maxEigenvalue[cellIndex] = 0.0;
346 &cellData,
347 &faceData,
348 cellIndex,
349 cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
350 // arithmetic
352 );
353 }
354
355 watchKernelLaunch.stop();
356 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
357 }
358
359
360 reportRuntime("variant 4", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
361 // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
362 // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
363
364 #if defined(WITH_OPENMP)
365 }//threadIndex
366 #endif
367
368 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
369 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
370 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
371 }
372 for (int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
373 tarch::freeMemory(faceData.QIn[faceIndex][0], tarch::MemoryLocation::Heap);
374 tarch::freeMemory(faceData.QIn[faceIndex][1], tarch::MemoryLocation::Heap);
375 tarch::freeMemory(faceData.QOut[faceIndex][0], tarch::MemoryLocation::Heap);
376 tarch::freeMemory(faceData.QOut[faceIndex][1], tarch::MemoryLocation::Heap);
377 }
378}
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant4.cpp:42
void runKernels(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant4.cpp:100
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition Utils.h:68
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition Utils.h:30
tarch::logging::Log _log
This is variant 4 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition Variant4.cpp:223
Represents the sides of one face, with 2 sides (left and right) to a face For ADER QIn will contain t...
Definition LRFaceData.h:20
tarch::la::Vector< DIMENSIONS, double > * faceSize
Definition LRFaceData.h:27
inType *(* QIn)[2]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition LRFaceData.h:25
tarch::la::Vector< DIMENSIONS, double > * faceCentre
Definition LRFaceData.h:26
outType *(* QOut)[2]
Out values.
Definition LRFaceData.h:60