10 #include "Constants.h"
11 #include "ccz4-main.h"
12 #include "exahype2/UnitTests.h"
13 #include "exahype2/UserInterface.h"
14 #include "observers/CreateGrid.h"
15 #include "observers/CreateGridAndConvergeLoadBalancing.h"
16 #include "observers/CreateGridButPostponeRefinement.h"
17 #include "observers/InitGrid.h"
18 #include "observers/PlotSolution.h"
19 #include "observers/CheckpointSolution.h"
20 #include "observers/TimeStep.h"
21 #include "peano4/peano4.h"
22 #include "peano4/UnitTests.h"
23 #include "repositories/DataRepository.h"
24 #include "repositories/SolverRepository.h"
25 #include "repositories/StepRepository.h"
26 #include "tarch/UnitTests.h"
27 #include "peano4/grid/Spacetree.h"
28 #include "peano4/parallel/SpacetreeSet.h"
29 #include "tarch/logging/Log.h"
30 #include "tarch/logging/LogFilter.h"
31 #include "tarch/logging/Statistics.h"
32 #include "tarch/multicore/Core.h"
33 #include "tarch/multicore/multicore.h"
34 #include "tarch/NonCriticalAssertions.h"
35 #include "tarch/tests/TreeTestCaseCollection.h"
36 #include "tarch/timing/Measurement.h"
37 #include "tarch/timing/Watch.h"
38 #include "toolbox/blockstructured/UnitTests.h"
39 #include "toolbox/loadbalancing/loadbalancing.h"
42 #if defined(USE_ADDITIONAL_MESH_TRAVERSAL)
43 #include "observers/AdditionalMeshTraversal.h"
49 tarch::logging::Log
_log(
"::");
67 static bool gridConstructed =
false;
68 static bool gridInitialised =
false;
69 static bool gridBalanced =
false;
70 static double nextMaxPlotTimeStamp = FirstPlotTimeStamp;
71 static double nextMinPlotTimeStamp = FirstPlotTimeStamp;
72 static double nextMaxCheckpointTimeStamp = FirstCheckpointTimeStamp;
73 static double nextMinCheckpointTimeStamp = FirstCheckpointTimeStamp;
74 static bool haveJustWrittenSnapshot =
false;
75 static bool haveReceivedNoncriticialAssertion =
false;
76 static bool addGridSweepWithoutGridRefinementNext =
false;
77 static tarch::la::Vector<DIMENSIONS, double> minH = tarch::la::Vector<DIMENSIONS, double>(
78 std::numeric_limits<double>::max()
80 static int globalNumberOfTrees = 0;
81 bool continueToSolve =
true;
83 if (tarch::hasNonCriticalAssertionBeenViolated() and not haveReceivedNoncriticialAssertion) {
84 peano4::parallel::Node::getInstance().setNextProgramStep(
85 repositories::StepRepository::toProgramStep(repositories::StepRepository::Steps::PlotSolution)
87 haveReceivedNoncriticialAssertion =
true;
89 "selectNextAlgorithmicStep()",
"non-critical assertion has been triggered in code. Dump final state and terminate"
91 }
else if (tarch::hasNonCriticalAssertionBeenViolated()) {
92 continueToSolve =
false;
93 }
else if (gridConstructed and not gridBalanced) {
94 if (not repositories::loadBalancer.isEnabled(
true) and not repositories::loadBalancer.hasSplitRecently()) {
95 logInfo(
"selectNextAlgorithmicStep()",
"all ranks have switched off their load balancing");
99 "selectNextAlgorithmicStep()",
"wait for load balancing to become stable: " << repositories::loadBalancer
103 peano4::parallel::Node::getInstance().setNextProgramStep(repositories::StepRepository::toProgramStep(
104 repositories::StepRepository::Steps::CreateGridAndConvergeLoadBalancing
106 }
else if (gridBalanced and not gridInitialised) {
107 peano4::parallel::Node::getInstance().setNextProgramStep(
108 repositories::StepRepository::toProgramStep(repositories::StepRepository::Steps::InitGrid)
111 gridInitialised =
true;
112 }
else if (not gridConstructed) {
113 if (tarch::la::max(peano4::parallel::SpacetreeSet::getInstance().getGridStatistics().getMinH()) < tarch::la::max(minH)) {
114 minH = peano4::parallel::SpacetreeSet::getInstance().getGridStatistics().getMinH();
116 "selectNextAlgorithmicStep()",
"mesh has refined, so reset minH=" << minH <<
" and postpone further refinement"
118 addGridSweepWithoutGridRefinementNext =
true;
119 }
else if (repositories::loadBalancer.getGlobalNumberOfTrees() > globalNumberOfTrees) {
120 logInfo(
"selectNextAlgorithmicStep()",
"mesh has rebalanced recently, so postpone further refinement)");
121 addGridSweepWithoutGridRefinementNext =
true;
122 globalNumberOfTrees = repositories::loadBalancer.getGlobalNumberOfTrees();
125 peano4::parallel::SpacetreeSet::getInstance().getGridStatistics().getStationarySweeps()>5
129 repositories::StepRepository::toStepEnum( peano4::parallel::Node::getInstance().getCurrentProgramStep() ) == repositories::StepRepository::Steps::CreateGrid
132 "selectNextAlgorithmicStep()",
"grid has been stationary for quite some time. Terminate grid construction"
134 addGridSweepWithoutGridRefinementNext =
false;
135 gridConstructed =
true;
138 "selectNextAlgorithmicStep()",
139 "mesh rebalancing seems to be stationary, so study whether to refine mesh further in next sweep: "
140 << peano4::parallel::SpacetreeSet::getInstance().getGridStatistics().toString()
142 addGridSweepWithoutGridRefinementNext =
false;
143 globalNumberOfTrees = repositories::loadBalancer.getGlobalNumberOfTrees();
147 if (addGridSweepWithoutGridRefinementNext) {
148 peano4::parallel::Node::getInstance().setNextProgramStep(repositories::StepRepository::toProgramStep(
149 repositories::StepRepository::Steps::CreateGridButPostponeRefinement
152 peano4::parallel::Node::getInstance().setNextProgramStep(
153 repositories::StepRepository::toProgramStep(repositories::StepRepository::Steps::CreateGrid)
157 continueToSolve =
true;
159 if (TimeInBetweenPlots > 0.0 and repositories::getMinTimeStamp() < MinTerminalTime and repositories::getMaxTimeStamp() < MaxTerminalTime and (repositories::getMinTimeStamp() >= nextMinPlotTimeStamp or repositories::getMaxTimeStamp() >= nextMaxPlotTimeStamp) and repositories::mayPlot()) {
160 if (repositories::getMinTimeStamp() >= nextMinPlotTimeStamp) {
161 nextMinPlotTimeStamp += TimeInBetweenPlots;
163 if (repositories::getMaxTimeStamp() >= nextMaxPlotTimeStamp) {
164 nextMaxPlotTimeStamp += TimeInBetweenPlots;
167 if (nextMinPlotTimeStamp < repositories::getMinTimeStamp()) {
169 "selectNextAlgorithmicStep()",
170 "code is asked to plot every dt="
171 << TimeInBetweenPlots <<
", but this seems to be less than the time step size of the solvers. "
172 <<
"So postpone next plot to t=" << (repositories::getMinTimeStamp() + TimeInBetweenPlots)
174 nextMinPlotTimeStamp = repositories::getMinTimeStamp() + TimeInBetweenPlots;
175 }
else if (nextMaxPlotTimeStamp < repositories::getMaxTimeStamp()) {
177 "selectNextAlgorithmicStep()",
178 "code is asked to plot every dt="
179 << TimeInBetweenPlots <<
", but this seems to be less than the time step size of the solvers. "
180 <<
"So postpone next plot to t=" << (repositories::getMaxTimeStamp() + TimeInBetweenPlots)
182 nextMaxPlotTimeStamp = repositories::getMaxTimeStamp() + TimeInBetweenPlots;
185 nextMaxPlotTimeStamp = std::max(nextMaxPlotTimeStamp, nextMinPlotTimeStamp);
187 peano4::parallel::Node::getInstance().setNextProgramStep(
188 repositories::StepRepository::toProgramStep(repositories::StepRepository::Steps::PlotSolution)
190 haveJustWrittenSnapshot =
true;
191 continueToSolve =
true;
192 }
else if (TimeInBetweenCheckpoints > 0.0 and repositories::getMinTimeStamp() < MinTerminalTime and repositories::getMaxTimeStamp() < MaxTerminalTime and (repositories::getMinTimeStamp() >= nextMinCheckpointTimeStamp or repositories::getMaxTimeStamp() >= nextMaxCheckpointTimeStamp) and repositories::mayPlot()) {
193 if (repositories::getMinTimeStamp() >= nextMinCheckpointTimeStamp) {
194 nextMinCheckpointTimeStamp += TimeInBetweenCheckpoints;
196 if (repositories::getMaxTimeStamp() >= nextMaxCheckpointTimeStamp) {
197 nextMaxCheckpointTimeStamp += TimeInBetweenCheckpoints;
200 if (nextMinCheckpointTimeStamp < repositories::getMinTimeStamp()) {
202 "selectNextAlgorithmicStep()",
203 "code is asked to Checkpoint every dt="
204 << TimeInBetweenCheckpoints <<
", but this seems to be less than the time step size of the solvers. "
205 <<
"So postpone next Checkpoint to t=" << (repositories::getMinTimeStamp() + TimeInBetweenCheckpoints)
207 nextMinCheckpointTimeStamp = repositories::getMinTimeStamp() + TimeInBetweenCheckpoints;
208 }
else if (nextMaxCheckpointTimeStamp < repositories::getMaxTimeStamp()) {
210 "selectNextAlgorithmicStep()",
211 "code is asked to Checkpoint every dt="
212 << TimeInBetweenCheckpoints <<
", but this seems to be less than the time step size of the solvers. "
213 <<
"So postpone next Checkpoint to t=" << (repositories::getMaxTimeStamp() + TimeInBetweenCheckpoints)
215 nextMaxCheckpointTimeStamp = repositories::getMaxTimeStamp() + TimeInBetweenCheckpoints;
218 nextMaxCheckpointTimeStamp = std::max(nextMaxCheckpointTimeStamp, nextMinCheckpointTimeStamp);
220 peano4::parallel::Node::getInstance().setNextProgramStep(
221 repositories::StepRepository::toProgramStep(repositories::StepRepository::Steps::CheckpointSolution)
223 haveJustWrittenSnapshot =
true;
224 continueToSolve =
true;
225 }
else if (repositories::getMinTimeStamp() < MinTerminalTime and repositories::getMaxTimeStamp() < MaxTerminalTime) {
226 peano4::parallel::Node::getInstance().setNextProgramStep(
227 repositories::StepRepository::toProgramStep(repositories::StepRepository::Steps::TimeStep)
229 continueToSolve =
true;
230 haveJustWrittenSnapshot =
false;
232 if (not haveJustWrittenSnapshot and TimeInBetweenPlots > 0.0 and repositories::mayPlot()) {
233 peano4::parallel::Node::getInstance().setNextProgramStep(
234 repositories::StepRepository::toProgramStep(repositories::StepRepository::Steps::PlotSolution)
236 continueToSolve =
true;
237 haveJustWrittenSnapshot =
true;
238 nextMinPlotTimeStamp = std::numeric_limits<double>::max();
239 nextMaxPlotTimeStamp = std::numeric_limits<double>::max();
240 }
else if (not haveJustWrittenSnapshot and TimeInBetweenPlots > 0.0 and not repositories::mayPlot()) {
241 continueToSolve =
true;
243 continueToSolve =
false;
248 return continueToSolve;
253 int stepIdentifier = peano4::parallel::Node::getInstance().getCurrentProgramStep();
254 auto stepName = repositories::StepRepository::toStepEnum(stepIdentifier);
256 static tarch::logging::Log
_log(
"");
259 if (tarch::mpi::Rank::getInstance().isGlobalMaster())
261 logInfo(
"step()",
"Starting AlgorithmicStep [" << repositories::StepRepository::toString(stepName)<<
"]" );
263 static tarch::timing::Watch watch(
"::",
"step()",
false);
265 static int creepingNumberOfLocalCells = 0;
268 case repositories::StepRepository::Steps::CreateGridButPostponeRefinement: {
269 tarch::logging::LogFilter::getInstance().switchProgramPhase(
"create-grid-but-postpone-refinement");
271 repositories::startGridConstructionStep();
273 observers::CreateGridButPostponeRefinement observer;
275 peano4::parallel::SpacetreeSet::getInstance().traverse(observer);
279 repositories::finishGridConstructionStep();
281 case repositories::StepRepository::Steps::CreateGrid: {
282 tarch::logging::LogFilter::getInstance().switchProgramPhase(
"create-grid");
284 repositories::startGridConstructionStep();
286 observers::CreateGrid observer;
288 peano4::parallel::SpacetreeSet::getInstance().traverse(observer);
292 repositories::finishGridConstructionStep();
296 creepingNumberOfLocalCells = ::toolbox::loadbalancing::getWeightOfHeaviestLocalSpacetree()
297 + tarch::multicore::Core::getInstance().getNumberOfThreads() * 3;
299 case repositories::StepRepository::Steps::CreateGridAndConvergeLoadBalancing: {
300 if (creepingNumberOfLocalCells < ::toolbox::loadbalancing::getWeightOfHeaviestLocalSpacetree() - 1) {
303 "it seems the grid has just refined before we switched to the phase where we make the load balancing converge. Wait for a few iterations more to give load balancing chance to catch up"
305 creepingNumberOfLocalCells = ::toolbox::loadbalancing::getWeightOfHeaviestLocalSpacetree()
306 + tarch::multicore::Core::getInstance().getNumberOfThreads() * 3;
309 tarch::logging::LogFilter::getInstance().switchProgramPhase(
"create-grid-and-converge-load-balancing");
312 if (::toolbox::loadbalancing::getWeightOfHeaviestLocalSpacetree() < 0 and repositories::loadBalancer.isEnabled(
false)) {
313 logInfo(
"step()",
"rank is degenerated so disable load balancing temporarily");
314 repositories::loadBalancer.enable(
false);
317 ::toolbox::loadbalancing::getWeightOfHeaviestLocalSpacetree() >= creepingNumberOfLocalCells
319 repositories::loadBalancer.isEnabled(
false)
323 "grid construction and decomposition on this rank seem to be stable as we have around "
324 << creepingNumberOfLocalCells <<
" local cells in the heaviest tree. Disable load balancing temporarily"
326 repositories::loadBalancer.enable(
false);
329 repositories::startGridConstructionStep();
331 observers::CreateGridButPostponeRefinement observer;
333 peano4::parallel::SpacetreeSet::getInstance().traverse(observer);
337 repositories::finishGridConstructionStep();
340 ::toolbox::loadbalancing::getWeightOfHeaviestLocalSpacetree() <= creepingNumberOfLocalCells
342 not repositories::loadBalancer.hasSplitRecently()
344 repositories::loadBalancer.isEnabled(
false)
348 "have to decrement local cell counter "
349 << creepingNumberOfLocalCells <<
" as maximum weight is "
350 << ::toolbox::loadbalancing::getWeightOfHeaviestLocalSpacetree()
352 creepingNumberOfLocalCells = (creepingNumberOfLocalCells
353 + ::toolbox::loadbalancing::getWeightOfHeaviestLocalSpacetree())
357 case repositories::StepRepository::Steps::InitGrid: {
358 tarch::logging::LogFilter::getInstance().switchProgramPhase(
"init-grid");
359 repositories::loadBalancer.enable(
false);
361 repositories::startGridInitialisationStep();
363 observers::InitGrid observer;
364 observers::InitGrid::prepareTraversal();
366 peano4::parallel::SpacetreeSet::getInstance().traverse(observer);
368 observers::InitGrid::unprepareTraversal();
371 repositories::finishGridInitialisationStep();
373 case repositories::StepRepository::Steps::PlotSolution: {
374 tarch::logging::LogFilter::getInstance().switchProgramPhase(
"plot-solution");
375 const double minTimeStamp = repositories::getMinTimeStamp();
376 const double maxTimeStamp = repositories::getMaxTimeStamp();
377 const double minTimeStepSize = repositories::getMinTimeStepSize();
378 const double maxTimeStepSize = repositories::getMaxTimeStepSize();
380 repositories::startPlottingStep(minTimeStamp, maxTimeStamp, minTimeStepSize, maxTimeStepSize);
382 observers::PlotSolution observer;
383 observers::PlotSolution::prepareTraversal();
385 peano4::parallel::SpacetreeSet::getInstance().traverse(observer);
387 observers::PlotSolution::unprepareTraversal();
390 repositories::finishPlottingStep();
392 case repositories::StepRepository::Steps::CheckpointSolution: {
393 tarch::logging::LogFilter::getInstance().switchProgramPhase(
"Checkpoint-solution");
394 const double minTimeStamp = repositories::getMinTimeStamp();
395 const double maxTimeStamp = repositories::getMaxTimeStamp();
396 const double minTimeStepSize = repositories::getMinTimeStepSize();
397 const double maxTimeStepSize = repositories::getMaxTimeStepSize();
399 repositories::startPlottingStep(minTimeStamp, maxTimeStamp, minTimeStepSize, maxTimeStepSize);
401 observers::CheckpointSolution observer;
402 observers::CheckpointSolution::prepareTraversal();
404 peano4::parallel::SpacetreeSet::getInstance().traverse(observer);
406 observers::CheckpointSolution::unprepareTraversal();
409 repositories::finishPlottingStep();
411 case repositories::StepRepository::Steps::TimeStep: {
412 tarch::logging::LogFilter::getInstance().switchProgramPhase(
"time-step");
413 if (repositories::loadBalancer.isEnabled(
false)) {
414 logInfo(
"step()",
"disable load balancing throughout initialisation (to be removed in later releases)");
415 repositories::loadBalancer.enable(
false);
418 const double minTimeStamp = repositories::getMinTimeStamp();
419 const double maxTimeStamp = repositories::getMaxTimeStamp();
420 const double minTimeStepSize = repositories::getMinTimeStepSize();
421 const double maxTimeStepSize = repositories::getMaxTimeStepSize();
422 const double minMeshSize = repositories::getMinMeshSize();
423 const double maxMeshSize = repositories::getMaxMeshSize();
425 repositories::startTimeStep(minTimeStamp, maxTimeStamp, minTimeStepSize, maxTimeStepSize);
427 observers::TimeStep observer;
428 observers::TimeStep::prepareTraversal();
430 peano4::parallel::SpacetreeSet::getInstance().traverse(observer);
432 observers::TimeStep::unprepareTraversal();
435 repositories::finishTimeStep();
438 #if defined(USE_ADDITIONAL_MESH_TRAVERSAL)
439 case repositories::StepRepository::Steps::AdditionalMeshTraversal:
441 tarch::logging::LogFilter::getInstance().switchProgramPhase(
"additional-MeshTraversal" );
443 repositories::suspendSolversForOneGridSweep();
444 observers::AdditionalMeshTraversal observer;
445 observers::AdditionalMeshTraversal::prepareTraversal();
446 peano4::parallel::SpacetreeSet::getInstance().traverse(observer);
447 observers::AdditionalMeshTraversal::unprepareTraversal();
452 case repositories::StepRepository::Steps::Undef:
459 if (tarch::mpi::Rank::getInstance().isGlobalMaster())
461 logInfo(
"step()",
"Finishing [" << repositories::StepRepository::toString(stepName) <<
"] Current SolverState [" << AbstractCCZ4::toString(repositories::instanceOfCCZ4.getSolverState())<<
"]" );
464 int main(
int argc,
char** argv) {
465 constexpr
int ExitCodeSuccess = 0;
466 constexpr
int ExitCodeUnitTestsFailed = 1;
467 constexpr
int ExitCodeInvalidArguments = 2;
468 constexpr
int ExitCodeInvalidBuild = 3;
470 static tarch::timing::Watch watch(
"::",
"main()",
false);
472 peano4::initParallelEnvironment(&argc, &argv);
475 repositories::initLogFilters();
477 tarch::initNonCriticalAssertionEnvironment();
478 peano4::fillLookupTables();
480 peano4::initSingletons(DomainOffset, DomainSize, PeriodicBC);
482 repositories::initSharedMemoryAndGPUEnvironment();
484 if (tarch::mpi::Rank::getInstance().getNumberOfRanks() > 1 and tarch::multicore::Core::getInstance().getNumberOfThreads() <= 1) {
485 logError(
"main()",
"MPI runs without multithreading are not supported currently.");
486 return ExitCodeInvalidBuild;
489 repositories::DataRepository::initDatatypes();
492 tarch::tests::TreeTestCaseCollection* unitTests =
new tarch::tests::TreeTestCaseCollection();
493 unitTests->addTestCase(peano4::getUnitTests());
494 unitTests->addTestCase(tarch::getUnitTests());
495 unitTests->addTestCase(toolbox::blockstructured::getUnitTests());
496 unitTests->addTestCase(exahype2::getUnitTests());
498 if (unitTests->getNumberOfErrors() != 0) {
499 logError(
"main()",
"unit tests failed. Quit.");
500 tarch::mpi::Rank::abort(ExitCodeUnitTestsFailed);
505 repositories::startSimulation();
507 tarch::logging::Statistics::getInstance().clear();
509 #if defined(WITH_OPENMP)
516 const bool isGlobalMaster = tarch::mpi::Rank::getInstance().isGlobalMaster();
517 const bool isPeanoComputeNode = not tarch::mpi::Rank::getInstance().isGlobalMaster();
519 if (isGlobalMaster) {
526 logInfo(
"main()",
"time per mesh sweep (current/average): " << std::fixed << std::setprecision(2) << watch.getCalendarTime() <<
" / " <<
timePerMeshSweepMeasurement.getValue() );
529 logInfo(
"main()",
"terminated successfully");
548 }
else if (isPeanoComputeNode) {
549 while (peano4::parallel::Node::getInstance().continueToRun()) {
553 #if defined(WITH_OPENMP)
558 tarch::logging::Statistics::getInstance().writeToCSV();
560 repositories::finishSimulation();
562 peano4::shutdownSingletons();
563 repositories::DataRepository::shutdownDatatypes();
564 tarch::shutdownNonCriticalAssertionEnvironment();
565 peano4::shutdownParallelEnvironment();
567 return ExitCodeSuccess;
tarch::timing::Measurement gridConstructionMeasurement
int main(int argc, char **argv)
tarch::timing::Measurement timePerMeshSweepMeasurement
tarch::timing::Measurement timeStepMeasurement
bool selectNextAlgorithmicStep()
Decide which step to run next.
tarch::timing::Measurement plotMeasurement
tarch::logging::Log _log("::")