Merge pull request #27561 from loganharbour/apptainer_hpc

New HPC scheduler
idaholab · Oct 28, 2024 · f5a01db · f5a01db
2 parents e96644b + 345144f
commit f5a01db
Show file tree

Hide file tree

Showing 120 changed files with 4,224 additions and 2,767 deletions.
diff --git a/.gitignore b/.gitignore
@@ -326,3 +326,14 @@ share/
 /modules/misc/misc.yaml
 /tutorials/tutorial01_app_development/*/babbler.yaml
 /tutorials/darcy_thermo_mech/*/darcy_thermo_mech.yaml
+
+# test harness --sep-files output
+*.runner_run_out.txt
+*.runner_out.txt
+*.tester_out.txt
+*.job_out.txt
+
+# test harness hpc output
+*.hpc_out.txt
+*.hpc_result
+*.hpc_submit
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -77,6 +77,7 @@
 
 /python/MooseDocs @cticenhour
 /python/moosesqa @cticenhour
+/python/TestHarness @loganharbour @milljm
 
 /scripts/hpc_proxy.pac @loganharbour
 /scripts/configure_petsc.sh @cticenhour @milljm @loganharbour

diff --git a/framework/app.mk b/framework/app.mk
@@ -498,6 +498,17 @@ install_data_%:
 	@mkdir -p $($@_dst)
 	@cp -r $($@_src) $($@_dst)
 
+ifneq ($(wildcard $(APPLICATION_DIR)/scripts/TestHarness/testers),)
+install_tester_$(APPLICATION_NAME)_src := $(APPLICATION_DIR)/scripts/TestHarness/testers
+install_tester_$(APPLICATION_NAME)_dst := $(share_install_dir)/scripts/TestHarness
+install_testers:: install_tester_$(APPLICATION_NAME)
+endif
+
+install_tester_%:
+	@echo "Installing TestHarness testers "$($@_dst)"..."
+	@mkdir -p $($@_dst)
+	@cp -r $($@_src) $($@_dst)
+
 $(copy_input_targets):
 	@$(eval kv := $(subst ->, ,$(subst target_$(APPLICATION_NAME)_,,$@)))
 	@$(eval source_dir := $(word 1, $(kv)))

diff --git a/framework/moose.mk b/framework/moose.mk
@@ -484,12 +484,15 @@ moose_share_dir = $(share_dir)/moose
 python_install_dir = $(moose_share_dir)/python
 bin_install_dir = $(PREFIX)/bin
 
-install: all install_all_libs install_bin install_harness install_exodiff install_adreal_monolith install_hit install_data
+install: all install_all_libs install_bin install_harness install_exodiff install_adreal_monolith install_hit install_data install_testers
 
 install_data::
 	@mkdir -p $(moose_share_dir)
 	@cp -a $(FRAMEWORK_DIR)/data $(moose_share_dir)/
 
+install_testers::
+    @:
+
 install_adreal_monolith: ADRealMonolithic.h
 	@ mkdir -p $(moose_include_dir)
 	@cp -f $< $(moose_include_dir)/

diff --git a/modules/doc/content/application_development/index.md b/modules/doc/content/application_development/index.md
@@ -20,8 +20,6 @@ These documentation pages are meant to be used by developers who are developing
 
 [Test System](/test_system.md) - How to create/maintain tests for your application
 
-[Performance Benchmarking](/performance_benchmarking.md) - How to perform benchmarking
-
 [Profiling](/profiling.md) - How to profile your application in order to determine what functions are hogging compute time.
 
 [Code Coverage](/coverage.md) - How to add automatic code coverage to your application, and use it in your development workflow

diff --git a/modules/doc/content/application_development/performance_benchmarking.md b/modules/doc/content/application_development/performance_benchmarking.md
@@ -1,165 +1,4 @@
 # Performance Benchmarking
 
-Utilities for doing performance benchmarking of MOOSE-based applications are included in the main
-MOOSE repository.  These utilities provide functionality for benchmarking and tracking MOOSE
-performance.  They can be used to run benchmarks, generate trend visualizations, and look at stats
-comparing benchmarks between various revisions.  The following sections describe how to setup a
-benchmark machine and use it to run benchmarks and visualize results.
-
-## Tuning a Benchmarking Machine
-
-In order to obtain accurate results, you need to run the benchmark process(es)
-as close to isolated as possible.  On a linux system, you should e.g. use cpu
-isolation via setting kernel boot parameters:
-
-```text
-isolcpus=[n] rcu_nocbs=[n]
-```
-
-in your boot loader (e.g. grub).  The benchmarking tools/scripts in MOOSE should automatically
-detect CPU isolation on Linux and schedule benchmark jobs to those CPUs. You should also disable
-any turbo functionality.  For example on `intel_pstate` driver cpus:
-
-```text
-$ echo "1" > /sys/devices/system/cpu/intel_pstate/no_turbo
-```
-
-You will also want to turn off any hyperthreading for cores you use for benchmarking.  You can do
-this in the bios or by something like:
-
-```text
-$ echo "0" > /sys/devices/system/cpu/cpu[n]/online
-```
-
-for each hyperthread core you want running - you can look in `/proc/cpuinfo` for pairs of cpus
-that have the same core id turning off one of the pair.  These will need to be done on every boot.
-You can use the sysfsutils package and its `/etc/sysfs.conf` configuration file to do this
-persistently on boot - i.e.:
-
-```text
-devices/system/cpu/intel_pstate/no_turbo = 1
-devices/system/cpu/cpu3/online = 0
-devices/system/cpu/cpu5/online = 0
-```
-
-## Test Harness Benchmarks
-
-Benchmarks can be run through the test harness (i.e.  using the `run_tests` script) by doing
-e.g. `./run_tests --run speedtests`.  When this is done, the test harness looks for test spec
-files named `speedtests` just like the `tests` files that contain regular moose test details.
-The format for these files is:
-
-```text
-[Benchmarks]
-    [benchmark-name]
-        type = SpeedTest
-        input = input-file-name.i
-        cli_args = '--an-arg=1 a/hit/format/cli/arg=foo'
-        # optional:
-        min_runs = 15 # default 40
-        max_runs = 100 # default 400
-        cumulative_dur = 100 # default 60 sec
-    []
-
-    [./benchmark2-name]
-        type = SpeedTest
-        input = another-input-file-name.i
-        cli_args = 'some/cli/arg=bar'
-    []
-
-    # ...
-[]
-```
-
-After being run, benchmark data are stored in a sqlite database (default name
-`speedtests.sqlite`).  When the test harness is run without the `--run speedtests` flag, tests
-described in `speedtests` files are run in *check-only* mode where moose just checks that their
-input files are well-formed and parse correctly without actually running them.
-
-
-## Manual/Direct Benchmarks
-
-The `[moose-repo]/scripts/benchmark.py` script can be used to manually list and directly run benchmarks without the
-test harness (for hacking, debugging, etc.).  To do this, the script reads a `bench.list` text
-file that specifies which input files should be run and corresponding (benchmark) names for them
-along with any optional arguments.  The `bench.list` file has the following format:
-
-```text
-[benchmarks]
-    [./simple_diffusion_refine3]
-        binary = test/moose_test-opt
-        input = test/tests/kernels/simple_diffusion/simple_diffusion.i
-        cli_args = 'Mesh/uniform_refine=3'
-    [../]
-    [./simple_diffusion_refine4]
-        binary = test/moose_test-opt
-        input = test/tests/kernels/simple_diffusion/simple_diffusion.i
-        cli_args = 'Mesh/uniform_refine=4'
-    [../]
-    [./simple_diffusion_ref5]
-        binary = test/moose_test-opt
-        input = test/tests/kernels/simple_diffusion/simple_diffusion.i
-        cli_args = 'Mesh/uniform_refine=5'
-    [../]
-    # ... add as many as you want
-[]
-```
-
-To run the manual benchmarks directly, do this:
-
-```text
-$ ./scripts/benchmark.py --run
-```
-
-When benchmarks are run, the binaries specified in `bench.list` must already exist.  Benchmark
-data are then stored in a sqlite database (default name `speedtests.sqlite`).  You can specify
-the minimum number of runs for each benchmark problem/simulation with the `--min-runs` (default
-10).  Each benchmark will be run as many times as possible within 1 minute (customizable via the
-`--cum-dur` flag) or the specified minimum number of times (whichever is larger). 
-
-## Analyzing Results
-
-Regardless of how you ran the benchmarks (either by this script or using the test harness), MOOSE
-revisions with available benchmark data can be listed (from the database) by running:
-
-```text
-$ ./benchmark.py --list-revs
-44d2f3434b3346dc14fc9e86aa99ec433c1bbf10	2016-09-07 19:36:16
-86ced0d0c959c9bdc59497f0bc9324c5cdcd7e8f	2016-09-08 09:29:17
-447b455f1e2d8eda649468ed03ef792504d4b467	2016-09-08 09:43:56
-...
-```
-
-To look at stats comparing benchmark data from two revisions, run:
-
-```text
-$ ./benchmark.py # defaults to using the most recent two revisions of benchmark data
--------------------------------- 871c98630c98 to 38bb6f5ebe5f --------------------------------
-          benchmark               old (sec/run)     new (sec/run)    speedup (pvalue,nsamples)
-----------------------------------------------------------------------------------------------
-    simple diffusion (refine3):      0.408034          0.408034          ~   (p=0.996 n=36+36)
-
-    simple diffusion (refine4):      1.554724          1.561682          ~   (p=0.571 n=10+10)
-    simple diffusion (refine5):      6.592326          6.592326          ~   (p=0.882 n=4+4)
-----------------------------------------------------------------------------------------------
-
-$ ./benchmark.py -old 44d2f34 -new 447b455 # or specify revisions to compare manually
-------------------------------------- 44d2f34 to 447b455 -------------------------------------
-          benchmark               old (sec/run)     new (sec/run)    speedup (pvalue,nsamples)
-----------------------------------------------------------------------------------------------
-    simple diffusion (refine3):      0.416574          0.411435        -1.2% (p=0.000 n=37+37)
-    simple diffusion (refine4):      1.554724          1.497379        -3.7% (p=0.000 n=10+11)
-    simple diffusion (refine5):      6.553244          6.360004        -2.9% (p=0.030 n=4+4)
-----------------------------------------------------------------------------------------------
-```
-
-To generate visualizations, run:
-
-```text
-$ ./scripts/benchmark.py --trends
-```
-
-This will generate an svg box plot for each benchmark over time/revision in a `trends`
-subdirectory.  An `index.html` file is also generated that embeds all the svg plots for
-convenient viewing all together in a browser.
-
+!alert error title=Removed
+This content has been removed.
diff --git a/...ng_started/examples_and_tutorials/tutorial01_app_development/step07_parallel.md b/...ng_started/examples_and_tutorials/tutorial01_app_development/step07_parallel.md
@@ -69,8 +69,6 @@ There is an entire field of science about [!ac](HPC) and massively parallel proc
 !alert tip title=Try to target 20,000 [!ac](DOFs)-per-process.
 MOOSE developers tend to agree that 20,000 is the ideal number of [!ac](DOFs) that a single process may be responsible for. This value is reported as "`Num Local DOFs`" in the terminal printout at the beginning of every execution. There are, of course, some exceptions; if a problem exhibits speedup with less than 20,000 [!ac](DOFs)/process, then just use that.
 
-*For more information about application performance, please visit the [application_development/performance_benchmarking.md] page.*
-
 ## Demonstration
 
 To demonstrate the importance of parallel execution, the current Darcy pressure input file will be

diff --git a/modules/doc/content/infrastructure/index.md b/modules/doc/content/infrastructure/index.md
@@ -9,4 +9,3 @@ of MOOSE and MOOSE-based applications:
 - [Python Tools](python/index.md)
 - [Build System](/build_system.md)
 - [Test System](/test_system.md)
-- [Benchmarking](/performance_benchmarking.md)
diff --git a/modules/ray_tracing/test/tests/actions/add_raybc_action/tests b/modules/ray_tracing/test/tests/actions/add_raybc_action/tests
@@ -29,7 +29,7 @@
     [multiple]
       type = RunException
       input = 'add_raybc_action.i'
-      cli_args = 'RayBCs/active=multiple_studies "UserObjects/active=\'study another_study\'"'
+      cli_args = 'RayBCs/active=multiple_studies UserObjects/active="study another_study"'
       expect_err = "While constructing the NullRayBC 'multiple_studies', multiple RayTracingStudy objects were found."
       allow_test_objects = true
 

diff --git a/modules/ray_tracing/test/tests/actions/add_raykernel_action/tests b/modules/ray_tracing/test/tests/actions/add_raykernel_action/tests
@@ -29,7 +29,7 @@
     [multiple]
       type = RunException
       input = 'add_raykernel_action.i'
-      cli_args = 'RayKernels/active=multiple_studies "UserObjects/active=\'study another_study\'"'
+      cli_args = 'RayKernels/active=multiple_studies UserObjects/active="study another_study"'
       expect_err = "While constructing the NullRayKernel 'multiple_studies', multiple RayTracingStudy objects were found."
       allow_test_objects = true
 

diff --git a/modules/ray_tracing/test/tests/outputs/ray_tracing_mesh_output/tests b/modules/ray_tracing/test/tests/outputs/ray_tracing_mesh_output/tests
@@ -57,7 +57,7 @@
                 Mesh/Partitioner/nx=2
                 Mesh/Partitioner/ny=2
                 Outputs/rays/type=RayTracingNemesis
-                "Outputs/rays/output_properties=\'ray_id intersections pid processor_crossings trajectory_changes\'"
+                Outputs/rays/output_properties="ray_id intersections pid processor_crossings trajectory_changes"
                 Outputs/rays/file_base=nemesis_rays'
     exodiff = 'nemesis_rays.e.4.0 nemesis_rays.e.4.1 nemesis_rays.e.4.2 nemesis_rays.e.4.3'
     min_parallel = 4
@@ -74,7 +74,7 @@
                 Mesh/Partitioner/nx=2
                 Mesh/Partitioner/ny=2
                 Outputs/rays/type=RayTracingNemesis
-                "Outputs/rays/output_properties=\'intersections pid processor_crossings trajectory_changes\'"
+                Outputs/rays/output_properties="intersections pid processor_crossings trajectory_changes"
                 Outputs/rays/file_base=ray_mesh_output_transient_nemesis_rays'
     # Missing some files here beacuse exodiff doesn't like diffing empty output,
     # which is the case for the early transient when not all procs are hit
@@ -107,7 +107,7 @@
       cli_args = 'Mesh/Partitioner/type=GridPartitioner
                   Mesh/Partitioner/ny=3
                   UserObjects/study/segments_on_cache_traces=false
-                  "Outputs/rays/output_properties=\'ray_id intersections pid processor_crossings trajectory_changes\'"
+                  Outputs/rays/output_properties="ray_id intersections pid processor_crossings trajectory_changes"
                   Outputs/rays/file_base=no_segments_rays'
       exodiff = 'no_segments_rays.e'
       min_parallel = 3
@@ -122,7 +122,7 @@
                   Mesh/Partitioner/ny=2
                   Outputs/rays/type=RayTracingNemesis
                   UserObjects/study/segments_on_cache_traces=false
-                  "Outputs/rays/output_properties=\'ray_id intersections pid processor_crossings trajectory_changes\'"
+                  Outputs/rays/output_properties="ray_id intersections pid processor_crossings trajectory_changes"
                   Outputs/rays/file_base=no_segments_nemesis_rays'
       exodiff = 'no_segments_nemesis_rays.e.2.0 no_segments_nemesis_rays.e.2.1'
       min_parallel = 2

diff --git a/modules/ray_tracing/test/tests/traceray/internal_sidesets/tests b/modules/ray_tracing/test/tests/traceray/internal_sidesets/tests
@@ -9,7 +9,7 @@
       input = 'internal_sidesets_1d.i'
       csvdiff = 'internal_sidesets_1d_kill_out.csv'
       cli_args = 'Outputs/file_base=internal_sidesets_1d_kill_out
-                  RayBCs/active=\'kill_internal\''
+                  RayBCs/active=kill_internal'
       allow_test_objects = true
 
       detail = 'one-dimensional meshes, '
@@ -19,7 +19,7 @@
       input = 'internal_sidesets_2d.i'
       csvdiff = 'internal_sidesets_2d_kill_out.csv'
       cli_args = 'Outputs/file_base=internal_sidesets_2d_kill_out
-                  RayBCs/active=\'kill_internal\''
+                  RayBCs/active=kill_internal'
       allow_test_objects = true
 
       detail = 'two-dimensional meshes, '
@@ -29,7 +29,7 @@
       input = 'internal_sidesets_3d.i'
       csvdiff = 'internal_sidesets_3d_kill_out.csv'
       cli_args = 'Outputs/file_base=internal_sidesets_3d_kill_out
-                  RayBCs/active=\'kill_internal\''
+                  RayBCs/active=kill_internal'
       allow_test_objects = true
 
       detail = 'and three-dimensional meshes.'
@@ -43,7 +43,7 @@
       input = 'internal_sidesets_1d.i'
       csvdiff = 'internal_sidesets_1d_reflect_out.csv'
       cli_args = 'Outputs/file_base=internal_sidesets_1d_reflect_out
-                  "RayBCs/active=\'kill_external reflect_internal\'"'
+                  RayBCs/active="kill_external reflect_internal"'
       allow_test_objects = true
 
       detail = 'one-dimensional meshes, '
@@ -54,7 +54,7 @@
       input = 'internal_sidesets_2d.i'
       csvdiff = 'internal_sidesets_2d_reflect_out.csv'
       cli_args = 'Outputs/file_base=internal_sidesets_2d_reflect_out
-                  "RayBCs/active=\'kill_external reflect_internal\'"'
+                  RayBCs/active="kill_external reflect_internal"'
       allow_test_objects = true
 
       detail = 'two-dimensional meshes, '
@@ -65,7 +65,7 @@
       input = 'internal_sidesets_3d.i'
       csvdiff = 'internal_sidesets_3d_reflect_out.csv'
       cli_args = 'Outputs/file_base=internal_sidesets_3d_reflect_out
-                  "RayBCs/active=\'kill_external reflect_internal\'"'
+                  RayBCs/active="kill_external reflect_internal"'
       allow_test_objects = true
 
       detail = 'and three-dimensional meshes.'
@@ -76,7 +76,7 @@
     type = RunException
     input = 'internal_sidesets_1d.i'
     cli_args = 'UserObjects/study/use_internal_sidesets=false
-                RayBCs/active=\'kill_internal\''
+                RayBCs/active=kill_internal'
     expect_err = 'RayBCs are defined on internal sidesets, but the study is not set to use internal sidesets during tracing.'
     allow_test_objects = true
 

diff --git a/modules/ray_tracing/test/tests/userobjects/ray_tracing_study/bc_create_ray/tests b/modules/ray_tracing/test/tests/userobjects/ray_tracing_study/bc_create_ray/tests
@@ -19,7 +19,7 @@
       allow_test_objects = true
       cli_args = 'Outputs/file_base=bc_create_ray_3d_out
                   Mesh/active=gmg_3d
-                  "RayBCs/active=\'kill_3d create_3d\'"'
+                  RayBCs/active="kill_3d create_3d"'
       detail = 'and in three-dimensional meshes.'
     []
   []

diff --git a/modules/thermal_hydraulics/test/tests/utils/logger/tests b/modules/thermal_hydraulics/test/tests/utils/logger/tests
@@ -14,7 +14,9 @@
   [errors]
     type = RunException
     input = 'test.i'
-    expect_err = "componentB: warning 2.*componentA: error 1.*componentA: error 2.*componentB: error 1.*componentB: error 2"
+    # We can't reliably check for the warnings first here because the ordering of the warning
+    # and the error is MPI implementation dependent
+    expect_err = "componentA: error 1.*componentA: error 2.*componentB: error 1.*componentB: error 2"
     allow_test_objects = true
     allow_warnings = true # Testing that warnings are emitted
     requirement = 'The system shall be able to output errors in a batch.'