From a320d619ec46cf164372285bbf834aacc63aa15e Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 22 Oct 2024 07:37:27 -0600 Subject: [PATCH] undo merge and rebase --- CIME/SystemTests/err.py | 13 ++-- CIME/case/case.py | 96 +++++--------------------- CIME/case/case_st_archive.py | 127 +++++++++++++++++++++-------------- 3 files changed, 98 insertions(+), 138 deletions(-) diff --git a/CIME/SystemTests/err.py b/CIME/SystemTests/err.py index 355ddd5d390..276ceb634aa 100644 --- a/CIME/SystemTests/err.py +++ b/CIME/SystemTests/err.py @@ -5,7 +5,7 @@ import glob, os from CIME.XML.standard_module_setup import * from CIME.SystemTests.restart_tests import RestartTest -from CIME.utils import ls_sorted_by_mtime, safe_copy +from CIME.utils import safe_copy logger = logging.getLogger(__name__) @@ -35,12 +35,11 @@ def _case_two_setup(self): def _case_two_custom_prerun_action(self): dout_s_root = self._case1.get_value("DOUT_S_ROOT") - rest_root = os.path.abspath(os.path.join(dout_s_root, "rest")) - restart_list = ls_sorted_by_mtime(rest_root) - expect(len(restart_list) >= 1, "No restart files found in {}".format(rest_root)) - self._case.restore_from_archive( - rest_dir=os.path.join(rest_root, restart_list[0]) - ) + self._drv_restart_pointer = self._case2.get_value("DRV_RESTART_POINTER") + resttime = self._drv_restart_pointer[-16:] + rest_root = os.path.abspath(os.path.join(dout_s_root, "rest", resttime)) + expect(os.path.isdir(rest_root), "None such directory {}".format(rest_root)) + self._case.restore_from_archive(rest_dir=rest_root) def _case_two_custom_postrun_action(self): # Link back to original case1 name diff --git a/CIME/case/case.py b/CIME/case/case.py index 6f9082b0aa4..5dfafadbe03 100644 --- a/CIME/case/case.py +++ b/CIME/case/case.py @@ -1301,9 +1301,6 @@ def configure( non_local=False, extra_machines_dir=None, case_group=None, - ngpus_per_node=0, - gpu_type=None, - gpu_offload=None, ): expect( check_name(compset_name, additional_chars="."), @@ -1561,64 +1558,6 @@ def configure( if test: self.set_value("TEST", True) - # ---------------------------------------------------------------------------------------------------------- - # Sanity check for a GPU run: - # 1. GPU_TYPE and GPU_OFFLOAD must both be defined to use GPUS - # 2. if ngpus_per_node argument is larger than the value of MAX_GPUS_PER_NODE, the NGPUS_PER_NODE - # XML variable in the env_mach_pes.xml file would be set to MAX_GPUS_PER_NODE automatically. - # 3. if ngpus-per-node argument is equal to 0, it will be updated to 1 automatically. - # ---------------------------------------------------------------------------------------------------------- - max_gpus_per_node = self.get_value("MAX_GPUS_PER_NODE") - if gpu_type and str(gpu_type).lower() != "none": - expect( - max_gpus_per_node, - f"GPUS are not defined for machine={machine_name} and compiler={compiler}", - ) - expect( - gpu_offload, - "Both gpu-type and gpu-offload must be defined if either is defined", - ) - expect( - compiler in ["nvhpc", "cray"], - f"Only nvhpc and cray compilers are expected for a GPU run; the user given compiler is {compiler}, ", - ) - valid_gpu_type = self.get_value("GPU_TYPE").split(",") - valid_gpu_type.remove("none") - expect( - gpu_type in valid_gpu_type, - f"Unsupported GPU type is given: {gpu_type} ; valid values are {valid_gpu_type}", - ) - valid_gpu_offload = self.get_value("GPU_OFFLOAD").split(",") - valid_gpu_offload.remove("none") - expect( - gpu_offload in valid_gpu_offload, - f"Unsupported GPU programming model is given: {gpu_offload} ; valid values are {valid_gpu_offload}", - ) - self.gpu_enabled = True - if ngpus_per_node >= 0: - self.set_value( - "NGPUS_PER_NODE", - max(1, ngpus_per_node) - if ngpus_per_node <= max_gpus_per_node - else max_gpus_per_node, - ) - elif gpu_offload and str(gpu_offload).lower() != "none": - expect( - False, - "Both gpu-type and gpu-offload must be defined if either is defined", - ) - elif ngpus_per_node != 0: - expect( - False, - f"ngpus_per_node is expected to be 0 for a pure CPU run ; {ngpus_per_node} is provided instead ;", - ) - - # Set these two GPU XML variables here to overwrite the default values - # Only set them for "cesm" model - if self._cime_model == "cesm": - self.set_value("GPU_TYPE", str(gpu_type).lower()) - self.set_value("GPU_OFFLOAD", str(gpu_offload).lower()) - self.initialize_derived_attributes() # -------------------------------------------- @@ -1907,13 +1846,15 @@ def create_caseroot(self, clone=False): component_class in self._component_description and len(self._component_description[component_class]) > 0 ): - append_status( - "Component {} is {}".format( - component_class, self._component_description[component_class] - ), - "README.case", - caseroot=self._caseroot, - ) + if "Stub" not in self._component_description[component_class]: + append_status( + "Component {} is {}".format( + component_class, + self._component_description[component_class], + ), + "README.case", + caseroot=self._caseroot, + ) if component_class == "CPL": append_status( "Using %s coupler instances" % (self.get_value("NINST_CPL")), @@ -1922,12 +1863,13 @@ def create_caseroot(self, clone=False): ) continue comp_grid = "{}_GRID".format(component_class) - - append_status( - "{} is {}".format(comp_grid, self.get_value(comp_grid)), - "README.case", - caseroot=self._caseroot, - ) + grid_val = self.get_value(comp_grid) + if grid_val is not "null": + append_status( + "{} is {}".format(comp_grid, self.get_value(comp_grid)), + "README.case", + caseroot=self._caseroot, + ) comp = str(self.get_value("COMP_{}".format(component_class))) user_mods = self._get_comp_user_mods(comp) if user_mods is not None: @@ -2440,9 +2382,6 @@ def create( non_local=False, extra_machines_dir=None, case_group=None, - ngpus_per_node=0, - gpu_type=None, - gpu_offload=None, ): try: # Set values for env_case.xml @@ -2515,9 +2454,6 @@ def create( non_local=non_local, extra_machines_dir=extra_machines_dir, case_group=case_group, - ngpus_per_node=ngpus_per_node, - gpu_type=gpu_type, - gpu_offload=gpu_offload, ) self.create_caseroot() diff --git a/CIME/case/case_st_archive.py b/CIME/case/case_st_archive.py index 1135741228b..5e32ef5b4f9 100644 --- a/CIME/case/case_st_archive.py +++ b/CIME/case/case_st_archive.py @@ -168,60 +168,84 @@ def _archive_rpointer_files( datename_is_last, ): ############################################################################### - if datename_is_last: - # Copy of all rpointer files for latest restart date - rpointers = glob.glob(os.path.join(rundir, "rpointer.*")) - for rpointer in rpointers: - safe_copy( - rpointer, os.path.join(archive_restdir, os.path.basename(rpointer)) - ) + rpointers = glob.glob( + os.path.join(rundir, "rpointer.*" + _datetime_str(datename)) + ) + # If timestamped rpointers exist use them + if rpointers: + for rpointer in rpointers: + safe_copy( + rpointer, os.path.join(archive_restdir, os.path.basename(rpointer)) + ) + else: + # Copy of all rpointer files for latest restart date + rpointers = glob.glob(os.path.join(rundir, "rpointer.*")) + for rpointer in rpointers: + safe_copy( + rpointer, os.path.join(archive_restdir, os.path.basename(rpointer)) + ) else: # Generate rpointer file(s) for interim restarts for the one datename and each # possible value of ninst_strings if save_interim_restart_files: - # parse env_archive.xml to determine the rpointer files - # and contents for the given archive_entry tag - rpointer_items = archive.get_rpointer_contents(archive_entry) - - # loop through the possible rpointer files and contents - for rpointer_file, rpointer_content in rpointer_items: - temp_rpointer_file = rpointer_file - temp_rpointer_content = rpointer_content - - # put in a temporary setting for ninst_strings if they are empty - # in order to have just one loop over ninst_strings below - if rpointer_content != "unset": - if not ninst_strings: - ninst_strings = ["empty"] - - for ninst_string in ninst_strings: - rpointer_file = temp_rpointer_file - rpointer_content = temp_rpointer_content - if ninst_string == "empty": - ninst_string = "" - for key, value in [ - ("$CASE", casename), - ("$DATENAME", _datetime_str(datename)), - ("$MPAS_DATENAME", _datetime_str_mpas(datename)), - ("$NINST_STRING", ninst_string), - ]: - rpointer_file = rpointer_file.replace(key, value) - rpointer_content = rpointer_content.replace(key, value) - - # write out the respective files with the correct contents - rpointer_file = os.path.join(archive_restdir, rpointer_file) - logger.info("writing rpointer_file {}".format(rpointer_file)) - f = open(rpointer_file, "w") - for output in rpointer_content.split(","): - f.write("{} \n".format(output)) - f.close() - else: - logger.info( - "rpointer_content unset, not creating rpointer file {}".format( - rpointer_file - ) + rpointers = glob.glob( + os.path.join(rundir, "rpointer.*" + _datetime_str(datename)) + ) + # If timestamped rpointers exist use them + if rpointers: + for rpointer in rpointers: + safe_copy( + rpointer, + os.path.join(archive_restdir, os.path.basename(rpointer)), ) + else: + # parse env_archive.xml to determine the rpointer files + # and contents for the given archive_entry tag + rpointer_items = archive.get_rpointer_contents(archive_entry) + + # loop through the possible rpointer files and contents + for rpointer_file, rpointer_content in rpointer_items: + temp_rpointer_file = rpointer_file + temp_rpointer_content = rpointer_content + + # put in a temporary setting for ninst_strings if they are empty + # in order to have just one loop over ninst_strings below + if rpointer_content != "unset": + if not ninst_strings: + ninst_strings = ["empty"] + + for ninst_string in ninst_strings: + rpointer_file = temp_rpointer_file + rpointer_content = temp_rpointer_content + if ninst_string == "empty": + ninst_string = "" + for key, value in [ + ("$CASE", casename), + ("$DATENAME", _datetime_str(datename)), + ("$MPAS_DATENAME", _datetime_str_mpas(datename)), + ("$NINST_STRING", ninst_string), + ]: + rpointer_file = rpointer_file.replace(key, value) + rpointer_content = rpointer_content.replace(key, value) + + # write out the respective files with the correct contents + rpointer_file = os.path.join( + archive_restdir, rpointer_file + ) + logger.info( + "writing rpointer_file {}".format(rpointer_file) + ) + f = open(rpointer_file, "w") + for output in rpointer_content.split(","): + f.write("{} \n".format(output)) + f.close() + else: + logger.info( + "rpointer_content unset, not creating rpointer file {}".format( + rpointer_file + ) + ) ############################################################################### @@ -501,9 +525,10 @@ def _archive_restarts_date_comp( """ datename_str = _datetime_str(datename) - if datename_is_last or case.get_value("DOUT_S_SAVE_INTERIM_RESTART_FILES"): - if not os.path.exists(archive_restdir): - os.makedirs(archive_restdir) + if ( + datename_is_last or case.get_value("DOUT_S_SAVE_INTERIM_RESTART_FILES") + ) and not os.path.isdir(archive_restdir): + os.makedirs(archive_restdir) # archive the rpointer file(s) for this datename and all possible ninst_strings _archive_rpointer_files(