diff --git a/src/tests/ftest/aggregation/dfuse_space_check.py b/src/tests/ftest/aggregation/dfuse_space_check.py index f95025328f3..1d119f807a7 100644 --- a/src/tests/ftest/aggregation/dfuse_space_check.py +++ b/src/tests/ftest/aggregation/dfuse_space_check.py @@ -7,6 +7,7 @@ import os import time +from dfuse_utils import get_dfuse, start_dfuse from ior_test_base import IorTestBase @@ -57,16 +58,19 @@ def wait_for_aggregation(self, retries=4, interval=60): self.log.info("Free space when test terminated: %s", current_space) self.fail("Aggregation did not complete within {} seconds".format(retries * interval)) - def write_multiple_files(self): + def write_multiple_files(self, dfuse): """Write multiple files. + Args: + dfuse (Dfuse): the dfuse object + Returns: int: Total number of files created before going out of space. """ file_count = 0 while self.get_nvme_free_space(False) >= self.block_size: - file_path = os.path.join(self.dfuse.mount_dir.value, "file{}.txt".format(file_count)) + file_path = os.path.join(dfuse.mount_dir.value, "file{}.txt".format(file_count)) write_dd_cmd = "dd if=/dev/zero of={} bs={} count=1".format(file_path, self.block_size) if 0 in self.execute_cmd(write_dd_cmd, fail_on_err=True, display_output=False): file_count += 1 @@ -106,13 +110,14 @@ def test_dfusespacecheck(self): # Create a pool, container, and start dfuse self.create_pool() self.create_cont() - self.start_dfuse(self.hostlist_clients, self.pool, self.container) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, self.pool, self.container) # get nvme space before write self.initial_space = self.get_nvme_free_space() # Create a file as large as we can - large_file = os.path.join(self.dfuse.mount_dir.value, 'largefile.txt') + large_file = os.path.join(dfuse.mount_dir.value, 'largefile.txt') self.execute_cmd('touch {}'.format(large_file)) dd_count = (self.initial_space // self.block_size) + 1 write_dd_cmd = "dd if=/dev/zero of={} bs={} count={}".format( @@ -130,14 +135,14 @@ def test_dfusespacecheck(self): self.pool.set_property("reclaim", "disabled") # Write small files until we run out of space - file_count1 = self.write_multiple_files() + file_count1 = self.write_multiple_files(dfuse) # Enable aggregation self.log.info("Enabling aggregation") self.pool.set_property("reclaim", "time") # remove all the small files created above. - self.execute_cmd("rm -rf {}".format(os.path.join(self.dfuse.mount_dir.value, '*'))) + self.execute_cmd("rm -rf {}".format(os.path.join(dfuse.mount_dir.value, '*'))) # Wait for aggregation to complete after file removal self.wait_for_aggregation() @@ -147,7 +152,7 @@ def test_dfusespacecheck(self): self.pool.set_property("reclaim", "disabled") # Write small files again until we run out of space and verify we wrote the same amount - file_count2 = self.write_multiple_files() + file_count2 = self.write_multiple_files(dfuse) self.log.info('file_count1 = %s', file_count1) self.log.info('file_count2 = %s', file_count2) diff --git a/src/tests/ftest/daos_test/dfuse.py b/src/tests/ftest/daos_test/dfuse.py index 5c048115564..205d1cd11a6 100644 --- a/src/tests/ftest/daos_test/dfuse.py +++ b/src/tests/ftest/daos_test/dfuse.py @@ -7,13 +7,14 @@ import os from collections import OrderedDict +from apricot import TestWithServers from cmocka_utils import CmockaUtils -from dfuse_test_base import DfuseTestBase +from dfuse_utils import get_dfuse, start_dfuse from general_utils import create_directory, get_log_file from job_manager_utils import get_job_manager -class DaosCoreTestDfuse(DfuseTestBase): +class DaosCoreTestDfuse(TestWithServers): """Runs DAOS DFuse tests. :avocado: recursive @@ -29,11 +30,11 @@ def run_test(self, il_lib=None): if il_lib is None: self.fail('il_lib is not defined.') - self.daos_test = os.path.join(self.bin, 'dfuse_test') + daos_test = os.path.join(self.bin, 'dfuse_test') # Create a pool, container and start dfuse. - self.add_pool(connect=False) - self.add_container(self.pool) + pool = self.get_pool(connect=False) + container = self.get_container(pool) cont_attrs = OrderedDict() @@ -67,14 +68,15 @@ def run_test(self, il_lib=None): elif cache_mode == 'native': use_dfuse = False else: - self.fail('Invalid cache_mode: {}'.format(cache_mode)) + self.fail(f'Invalid cache_mode: {cache_mode}') if use_dfuse: - self.container.set_attr(attrs=cont_attrs) + container.set_attr(attrs=cont_attrs) - self.start_dfuse(self.hostlist_clients, self.pool, self.container) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) - mount_dir = self.dfuse.mount_dir.value + mount_dir = dfuse.mount_dir.value else: # Bypass, simply create a remote directory and use that. mount_dir = '/tmp/dfuse-test' @@ -94,14 +96,14 @@ def run_test(self, il_lib=None): if il_lib == 'libpil4dfs.so': daos_test_env['D_IL_MOUNT_POINT'] = mount_dir - daos_test_env['D_IL_POOL'] = self.pool.identifier - daos_test_env['D_IL_CONTAINER'] = self.container.identifier + daos_test_env['D_IL_POOL'] = pool.identifier + daos_test_env['D_IL_CONTAINER'] = container.identifier daos_test_env['D_IL_REPORT'] = '0' daos_test_env['D_IL_MAX_EQ'] = '2' daos_test_env['D_IL_ENFORCE_EXEC_ENV'] = '1' command = [ - self.daos_test, + daos_test, '--test-dir', mount_dir, '--io', diff --git a/src/tests/ftest/datamover/negative.py b/src/tests/ftest/datamover/negative.py index aec63776a7e..3b05e1c8dfd 100644 --- a/src/tests/ftest/datamover/negative.py +++ b/src/tests/ftest/datamover/negative.py @@ -8,6 +8,7 @@ from os.path import join from data_mover_test_base import DataMoverTestBase +from dfuse_utils import get_dfuse, start_dfuse from duns_utils import format_path @@ -60,7 +61,8 @@ def test_dm_bad_params_dcp(self): self.set_tool("DCP") # Start dfuse to hold all pools/containers - self.start_dfuse(self.dfuse_hosts) + dfuse = get_dfuse(self, self.dfuse_hosts) + start_dfuse(self, dfuse) # Create a test pool pool1 = self.create_pool() @@ -69,7 +71,7 @@ def test_dm_bad_params_dcp(self): uns_cont = self.get_container(pool1) # Create a test container - cont1_path = join(self.dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') + cont1_path = join(dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') cont1 = self.get_container(pool1, path=cont1_path) # Create test files @@ -209,7 +211,8 @@ def test_dm_bad_params_fs_copy(self): self.set_tool("FS_COPY") # Start dfuse to hold all pools/containers - self.start_dfuse(self.dfuse_hosts) + dfuse = get_dfuse(self, self.dfuse_hosts) + start_dfuse(self, dfuse) # Create a test pool pool1 = self.create_pool() @@ -218,7 +221,7 @@ def test_dm_bad_params_fs_copy(self): uns_cont = self.get_container(pool1) # Create a test container - cont1_path = join(self.dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') + cont1_path = join(dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') cont1 = self.get_container(pool1, path=cont1_path) # Create test files diff --git a/src/tests/ftest/datamover/posix_meta_entry.py b/src/tests/ftest/datamover/posix_meta_entry.py index 40268fc7485..bb608c27853 100644 --- a/src/tests/ftest/datamover/posix_meta_entry.py +++ b/src/tests/ftest/datamover/posix_meta_entry.py @@ -6,6 +6,7 @@ from os.path import join from data_mover_test_base import DataMoverTestBase +from dfuse_utils import get_dfuse, start_dfuse from exception_utils import CommandFailure @@ -62,7 +63,8 @@ def run_dm_posix_meta_entry(self, tool): test_desc = self.test_id + " (preserve={})".format(str(preserve_on)) # Start dfuse to hold all pools/containers - self.start_dfuse(self.dfuse_hosts) + dfuse = get_dfuse(self, self.dfuse_hosts) + start_dfuse(self, dfuse) # Create 1 pool pool1 = self.create_pool() @@ -71,7 +73,7 @@ def run_dm_posix_meta_entry(self, tool): cont1 = self.get_container(pool1) daos_src_path = self.new_daos_test_path(False) dfuse_src_path = "{}/{}/{}{}".format( - self.dfuse.mount_dir.value, pool1.uuid, cont1.uuid, daos_src_path) + dfuse.mount_dir.value, pool1.uuid, cont1.uuid, daos_src_path) self.create_data(dfuse_src_path) # Create 1 source posix path with test data @@ -84,7 +86,7 @@ def run_dm_posix_meta_entry(self, tool): # DAOS -> DAOS daos_dst_path = self.new_daos_test_path(False) dfuse_dst_path = "{}/{}/{}{}".format( - self.dfuse.mount_dir.value, pool1.uuid, cont1.uuid, daos_dst_path) + dfuse.mount_dir.value, pool1.uuid, cont1.uuid, daos_dst_path) self.run_datamover( test_desc + "(DAOS->DAOS)", "DAOS", daos_src_path, pool1, cont1, @@ -105,7 +107,7 @@ def run_dm_posix_meta_entry(self, tool): # POSIX -> DAOS daos_dst_path = self.new_daos_test_path(False) dfuse_dst_path = "{}/{}/{}{}".format( - self.dfuse.mount_dir.value, pool1.uuid, cont1.uuid, daos_dst_path) + dfuse.mount_dir.value, pool1.uuid, cont1.uuid, daos_dst_path) self.run_datamover( test_desc + "(POSIX->DAOS)", "POSIX", posix_src_path, None, None, diff --git a/src/tests/ftest/datamover/posix_subsets.py b/src/tests/ftest/datamover/posix_subsets.py index bdce75a0ed8..45e33d9cec9 100644 --- a/src/tests/ftest/datamover/posix_subsets.py +++ b/src/tests/ftest/datamover/posix_subsets.py @@ -7,6 +7,7 @@ from os.path import join from data_mover_test_base import DataMoverTestBase +from dfuse_utils import get_dfuse, start_dfuse class DmvrPosixSubsets(DataMoverTestBase): @@ -49,7 +50,8 @@ def run_dm_posix_subsets(self, tool): self.set_tool(tool) # Start dfuse to hold all pools/containers - self.start_dfuse(self.dfuse_hosts) + dfuse = get_dfuse(self, self.dfuse_hosts) + start_dfuse(self, dfuse) # Create 1 pool pool1 = self.create_pool() @@ -57,13 +59,12 @@ def run_dm_posix_subsets(self, tool): # create dfuse containers to test copying to dfuse subdirectories dfuse_cont1 = self.get_container(pool1) dfuse_cont2 = self.get_container(pool1) - dfuse_src_dir = join(self.dfuse.mount_dir.value, pool1.uuid, dfuse_cont1.uuid) + dfuse_src_dir = join(dfuse.mount_dir.value, pool1.uuid, dfuse_cont1.uuid) # Create a special container to hold UNS entries uns_cont = self.get_container(pool1) # Create two test containers - container1_path = join( - self.dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') + container1_path = join(dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') container1 = self.get_container(pool1, path=container1_path) container2 = self.get_container(pool1) @@ -128,7 +129,7 @@ def run_dm_posix_subsets(self, tool): dfuse_dst_dir = self.new_posix_test_path( create=False, - parent=join(self.dfuse.mount_dir.value, pool1.uuid, dfuse_cont2.uuid)) + parent=join(dfuse.mount_dir.value, pool1.uuid, dfuse_cont2.uuid)) copy_list.append([ "copy_subsets (dfuse root to new dfuse dir)", ["POSIX", dfuse_src_dir, None, None], diff --git a/src/tests/ftest/datamover/posix_symlinks.py b/src/tests/ftest/datamover/posix_symlinks.py index cdffdd9085e..68d60e4c973 100644 --- a/src/tests/ftest/datamover/posix_symlinks.py +++ b/src/tests/ftest/datamover/posix_symlinks.py @@ -6,6 +6,7 @@ from os.path import join from data_mover_test_base import DataMoverTestBase +from dfuse_utils import get_dfuse, start_dfuse class DmvrPosixSymlinks(DataMoverTestBase): @@ -55,7 +56,8 @@ def run_dm_posix_symlinks(self, tool): self.set_tool(tool) # Start dfuse to hold all pools/containers - self.start_dfuse(self.dfuse_hosts) + dfuse = get_dfuse(self, self.dfuse_hosts) + start_dfuse(self, dfuse) # Create 1 pool pool1 = self.create_pool() @@ -64,19 +66,19 @@ def run_dm_posix_symlinks(self, tool): uns_cont = self.get_container(pool1) # Test links that point forward - container1_path = join(self.dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') + container1_path = join(dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') container1 = self.get_container(pool1, path=container1_path) self.run_dm_posix_symlinks_fun( pool1, container1, self.create_links_forward, "forward") # Test links that point backward - container2_path = join(self.dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns2') + container2_path = join(dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns2') container2 = self.get_container(pool1, path=container2_path) self.run_dm_posix_symlinks_fun( pool1, container2, self.create_links_backward, "backward") # Test a mix of forward and backward links - container3_path = join(self.dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns3') + container3_path = join(dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns3') container3 = self.get_container(pool1, path=container3_path) self.run_dm_posix_symlinks_fun( pool1, container3, self.create_links_mixed, "mixed") diff --git a/src/tests/ftest/datamover/posix_types.py b/src/tests/ftest/datamover/posix_types.py index 44321355588..0ef85d018a8 100644 --- a/src/tests/ftest/datamover/posix_types.py +++ b/src/tests/ftest/datamover/posix_types.py @@ -6,6 +6,7 @@ from os.path import basename, join from data_mover_test_base import DataMoverTestBase +from dfuse_utils import get_dfuse, start_dfuse from duns_utils import format_path, parse_path @@ -63,7 +64,8 @@ def run_dm_posix_types(self, tool): self.set_tool(tool) # Start dfuse to hold all pools/containers - self.start_dfuse(self.dfuse_hosts) + dfuse = get_dfuse(self, self.dfuse_hosts) + start_dfuse(self, dfuse) # Create 2 pools pool1 = self.create_pool(label='pool1') @@ -73,11 +75,11 @@ def run_dm_posix_types(self, tool): uns_cont = self.get_container(pool1) # Create all other containers - container1_path = join(self.dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') + container1_path = join(dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns1') container1 = self.get_container(pool1, path=container1_path, label='container1') - container2_path = join(self.dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns2') + container2_path = join(dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns2') container2 = self.get_container(pool1, path=container2_path, label='container2') - container3_path = join(self.dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns3') + container3_path = join(dfuse.mount_dir.value, pool1.uuid, uns_cont.uuid, 'uns3') container3 = self.get_container(pool2, path=container3_path, label='container3') # Create each source location diff --git a/src/tests/ftest/datamover/serial_large_posix.py b/src/tests/ftest/datamover/serial_large_posix.py index f2d7c336fea..6917097d901 100644 --- a/src/tests/ftest/datamover/serial_large_posix.py +++ b/src/tests/ftest/datamover/serial_large_posix.py @@ -4,6 +4,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent ''' from data_mover_test_base import DataMoverTestBase +from dfuse_utils import get_dfuse, start_dfuse from duns_utils import format_path @@ -54,8 +55,9 @@ def run_dm_serial_large_posix(self, tool): # Use dfuse as a shared intermediate for serialize + deserialize dfuse_cont = self.get_container(pool1) - self.start_dfuse(self.dfuse_hosts, pool1, dfuse_cont) - self.serial_tmp_dir = self.dfuse.mount_dir.value + dfuse = get_dfuse(self, self.dfuse_hosts) + start_dfuse(self, dfuse, pool1, dfuse_cont) + self.serial_tmp_dir = dfuse.mount_dir.value # Serialize/Deserialize cont1 to a new cont2 in pool2 result = self.run_datamover( diff --git a/src/tests/ftest/dbench/dbench.py b/src/tests/ftest/dbench/dbench.py index 05081fb4039..fbbb60f3961 100644 --- a/src/tests/ftest/dbench/dbench.py +++ b/src/tests/ftest/dbench/dbench.py @@ -4,13 +4,14 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ +from apricot import TestWithServers from ClusterShell.NodeSet import NodeSet from dbench_utils import Dbench -from dfuse_test_base import DfuseTestBase +from dfuse_utils import get_dfuse, start_dfuse from exception_utils import CommandFailure -class DbenchTest(DfuseTestBase): +class DbenchTest(TestWithServers): # pylint: disable=too-few-public-methods """Base Dbench test class. @@ -36,14 +37,18 @@ def test_dbench(self): :avocado: tags=DbenchTest,test_dbench """ - self.add_pool(connect=False) - self.add_container(self.pool) - self.start_dfuse(self.hostlist_clients, self.pool, self.container) + self.log_step('Creating a single pool and container') + pool = self.get_pool(connect=False) + container = self.get_container(pool) + self.log_step('Starting dfuse') + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) + + self.log_step('Running dbench') dbench_cmd = Dbench(self.hostlist_clients, self.tmp) dbench_cmd.get_params(self) - dbench_cmd.directory.update(self.dfuse.mount_dir.value) - + dbench_cmd.directory.update(dfuse.mount_dir.value) try: # Start dfuse dbench_cmd.run() @@ -53,9 +58,4 @@ def test_dbench(self): str(NodeSet.fromlist(dbench_cmd.hosts)), exc_info=error) self.fail("Test was expected to pass but it failed.") - # stop dfuse - self.stop_dfuse() - # destroy container - self.container.destroy() - # destroy pool - self.pool.destroy() + self.log.info('Test passed') diff --git a/src/tests/ftest/dfuse/bash.py b/src/tests/ftest/dfuse/bash.py index 720b2d12feb..8c0b05f01af 100644 --- a/src/tests/ftest/dfuse/bash.py +++ b/src/tests/ftest/dfuse/bash.py @@ -5,11 +5,13 @@ """ import os -from dfuse_test_base import DfuseTestBase +from apricot import TestWithServers +from dfuse_utils import get_dfuse, start_dfuse +from host_utils import get_local_host from run_utils import run_remote -class Cmd(DfuseTestBase): +class DfuseBashCmd(TestWithServers): """Base Cmd test class. :avocado: recursive @@ -54,29 +56,31 @@ def run_bashcmd(self, il_lib=None, compatible_mode=False): env_str = "" # Create a pool if one does not already exist. - self.add_pool(connect=False) - self.add_container(self.pool) - mount_dir = f"/tmp/{self.pool.identifier}_daos_dfuse" - self.start_dfuse(self.hostlist_clients, self.pool, self.container, mount_dir=mount_dir) + self.log_step('Creating a single pool and container') + pool = self.get_pool(connect=False) + container = self.get_container(pool) + + self.log_step('Starting dfuse') + dfuse_hosts = get_local_host() + dfuse = get_dfuse(self, dfuse_hosts) + params = {'mount_dir': f'/tmp/{pool.identifier}_daos_dfuse'} if il_lib is not None: - # unmount dfuse and mount again with caching disabled - self.dfuse.unmount(tries=1) - self.dfuse.update_params(disable_caching=True) - self.dfuse.update_params(disable_wb_cache=True) - self.dfuse.run() + params['disable_caching'] = True + params['disable_wb_cache'] = True + start_dfuse(self, dfuse, pool, container, **params) - fuse_root_dir = self.dfuse.mount_dir.value + fuse_root_dir = dfuse.mount_dir.value abs_dir_path = os.path.join(fuse_root_dir, "test") abs_file_path1 = os.path.join(abs_dir_path, "testfile1.txt") abs_file_path2 = os.path.join(abs_dir_path, "testfile2.txt") - with open(os.path.join(fuse_root_dir, "src.c"), "w") as fd: + with open(os.path.join(fuse_root_dir, "src.c"), "w", encoding="utf-8") as fd: fd.write('#include \n\nint main(void) {\nprintf("Hello World!");\n}\n') link_name = os.path.join(fuse_root_dir, "link_c") - with open(os.path.join(fuse_root_dir, "src_a.c"), "w") as fd: + with open(os.path.join(fuse_root_dir, "src_a.c"), "w", encoding="utf-8") as fd: fd.write('#include \n\nvoid fun_a(void) {\nprintf("fun_a()");\n}\n') - with open(os.path.join(fuse_root_dir, "src_b.c"), "w") as fd: + with open(os.path.join(fuse_root_dir, "src_b.c"), "w", encoding="utf-8") as fd: fd.write('#include \n\nvoid fun_b(void) {\nprintf("fun_b()");\n}\n') # list of commands to be executed. commands = [ @@ -129,16 +133,11 @@ def run_bashcmd(self, il_lib=None, compatible_mode=False): f'curl "https://www.google.com" -o {fuse_root_dir}/download.html', ] for cmd in commands: - result = run_remote(self.log, self.hostlist_clients, env_str + cmd) + self.log_step(f'Running command: {cmd}') + result = run_remote(self.log, dfuse_hosts, env_str + cmd) if not result.passed: self.fail(f'"{cmd}" failed on {result.failed_hosts}') - - # stop dfuse - self.stop_dfuse() - # destroy container - self.container.destroy() - # destroy pool - self.pool.destroy() + self.log.info('Test passed') def test_bashcmd(self): """ @@ -151,7 +150,7 @@ def test_bashcmd(self): :avocado: tags=all,daily_regression :avocado: tags=vm :avocado: tags=dfuse,dfs - :avocado: tags=Cmd,test_bashcmd + :avocado: tags=DfuseBashCmd,test_bashcmd """ self.run_bashcmd() @@ -165,8 +164,8 @@ def test_bashcmd_ioil(self): :avocado: tags=all,pr,daily_regression :avocado: tags=vm - :avocado: tags=dfuse,il,dfs - :avocado: tags=Cmd,test_bashcmd_ioil + :avocado: tags=dfuse,dfs,ioil + :avocado: tags=DfuseBashCmd,test_bashcmd_ioil """ self.run_bashcmd(il_lib="libioil.so") @@ -180,7 +179,7 @@ def test_bashcmd_pil4dfs(self): :avocado: tags=all,daily_regression :avocado: tags=vm - :avocado: tags=dfuse,pil4dfs,dfs - :avocado: tags=Cmd,test_bashcmd_pil4dfs + :avocado: tags=dfuse,dfs,pil4dfs + :avocado: tags=DfuseBashCmd,test_bashcmd_pil4dfs """ self.run_bashcmd(il_lib="libpil4dfs.so") diff --git a/src/tests/ftest/dfuse/bash_dcache.py b/src/tests/ftest/dfuse/bash_dcache.py index 55e83bbcd63..306228da283 100644 --- a/src/tests/ftest/dfuse/bash_dcache.py +++ b/src/tests/ftest/dfuse/bash_dcache.py @@ -7,7 +7,9 @@ import os import stat -from dfuse_test_base import DfuseTestBase +from apricot import TestWithServers +from dfuse_utils import get_dfuse, start_dfuse +from host_utils import get_local_host from run_utils import run_remote SCRIPT = """#!/bin/bash @@ -33,7 +35,7 @@ """ -class DFuseBashdcacheTest(DfuseTestBase): +class DFuseBashdcacheTest(TestWithServers): # pylint: disable=wrong-spelling-in-docstring """Base "Bashdcache" test class. @@ -58,18 +60,19 @@ def test_bash_dcache_pil4dfs(self): pool = self.get_pool(connect=False) container = self.get_container(pool) - self.start_dfuse(self.hostlist_clients, pool, container) + dfuse_hosts = get_local_host() + dfuse = get_dfuse(self, dfuse_hosts) + start_dfuse(self, dfuse, pool, container) + fuse_root_dir = dfuse.mount_dir.value - fuse_root_dir = self.dfuse.mount_dir.value - - with open(os.path.join(fuse_root_dir, "sh_dcache.sh"), "w") as fd: + with open(os.path.join(fuse_root_dir, "sh_dcache.sh"), "w", encoding="utf-8") as fd: fd.write(SCRIPT) os.chmod(os.path.join(fuse_root_dir, "sh_dcache.sh"), stat.S_IXUSR | stat.S_IRUSR) cmd = f"cd {fuse_root_dir}; ./sh_dcache.sh" - result = run_remote(self.log, self.hostlist_clients, env_str + cmd) + result = run_remote(self.log, dfuse_hosts, env_str + cmd) if not result.passed: self.fail(f'"{cmd}" failed on {result.failed_hosts}') if result.output[0].stdout[0][:5] != "Hello": @@ -78,6 +81,6 @@ def test_bash_dcache_pil4dfs(self): # Turn on directory caching in bash env_str = env_str + "export D_IL_NO_DCACHE_BASH=0; " - result = run_remote(self.log, self.hostlist_clients, env_str + cmd) + result = run_remote(self.log, dfuse_hosts, env_str + cmd) if result.passed: self.fail(f'"{cmd}" failed on {result.failed_hosts}') diff --git a/src/tests/ftest/dfuse/bash_fd.py b/src/tests/ftest/dfuse/bash_fd.py index a8a39d61a51..925e96e8a9c 100644 --- a/src/tests/ftest/dfuse/bash_fd.py +++ b/src/tests/ftest/dfuse/bash_fd.py @@ -7,7 +7,9 @@ import os import stat -from dfuse_test_base import DfuseTestBase +from apricot import TestWithServers +from dfuse_utils import get_dfuse, start_dfuse +from host_utils import get_local_host from run_utils import run_remote OUTER = """#!/bin/bash @@ -77,7 +79,7 @@ """ -class DFuseFdTest(DfuseTestBase): +class DFuseFdTest(TestWithServers): """Base FdTest test class. :avocado: recursive @@ -98,28 +100,35 @@ def run_bashfd(self, il_lib=None): else: env_str = "" + self.log_step('Creating a single pool and container') pool = self.get_pool(connect=False) container = self.get_container(pool) - self.start_dfuse(self.hostlist_clients, pool, container) - fuse_root_dir = self.dfuse.mount_dir.value + self.log_step('Starting dfuse') + dfuse_hosts = get_local_host() + dfuse = get_dfuse(self, dfuse_hosts) + start_dfuse(self, dfuse, pool, container) + fuse_root_dir = dfuse.mount_dir.value - with open(os.path.join(fuse_root_dir, "bash_fd_inner.sh"), "w") as fd: + self.log_step("Setting up the 'bash_fd_inner.sh' script") + with open(os.path.join(fuse_root_dir, "bash_fd_inner.sh"), "w", encoding="utf-8") as fd: fd.write(INNER) - os.chmod(os.path.join(fuse_root_dir, "bash_fd_inner.sh"), stat.S_IXUSR | stat.S_IRUSR) - with open(os.path.join(fuse_root_dir, "bash_fd_outer.sh"), "w") as fd: + self.log_step("Setting up the 'bash_fd_outer.sh' script") + with open(os.path.join(fuse_root_dir, "bash_fd_outer.sh"), "w", encoding="utf-8") as fd: fd.write(OUTER) - os.chmod(os.path.join(fuse_root_dir, "bash_fd_outer.sh"), stat.S_IXUSR | stat.S_IRUSR) cmd = f"cd {fuse_root_dir}; ./bash_fd_outer.sh" - result = run_remote(self.log, self.hostlist_clients, env_str + cmd) + self.log_step("Executing the 'bash_fd_outer.sh' script") + result = run_remote(self.log, dfuse_hosts, env_str + cmd) if not result.passed: self.fail(f'"{cmd}" failed on {result.failed_hosts}') + self.log.info('Test passed') + def test_bashfd(self): """ @@ -141,7 +150,7 @@ def test_bashfd_ioil(self): :avocado: tags=all,full_regression :avocado: tags=vm - :avocado: tags=dfuse,il,dfs + :avocado: tags=dfuse,dfs,ioil :avocado: tags=DFuseFdTest,test_bashfd_ioil """ self.run_bashfd(il_lib="libioil.so") @@ -154,7 +163,7 @@ def test_bashfd_pil4dfs(self): :avocado: tags=all,full_regression :avocado: tags=vm - :avocado: tags=pil4dfs,dfs + :avocado: tags=dfuse,dfs,pil4dfs :avocado: tags=DFuseFdTest,test_bashfd_pil4dfs """ self.run_bashfd(il_lib="libpil4dfs.so") diff --git a/src/tests/ftest/dfuse/container_type.py b/src/tests/ftest/dfuse/container_type.py index 592bce1cb6e..b14a3d7c4b6 100644 --- a/src/tests/ftest/dfuse/container_type.py +++ b/src/tests/ftest/dfuse/container_type.py @@ -1,13 +1,15 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ + +from apricot import TestWithServers from avocado.core.exceptions import TestFail -from dfuse_test_base import DfuseTestBase +from dfuse_utils import get_dfuse, start_dfuse -class DfuseContainerCheck(DfuseTestBase): +class DfuseContainerCheck(TestWithServers): """Base Dfuse Container check test class. :avocado: recursive @@ -31,40 +33,46 @@ def test_dfuse_container_check(self): :avocado: tags=DfuseContainerCheck,test_dfuse_container_check """ # get test params for cont and pool count - cont_types = self.params.get("cont_types", '/run/container/*') + cont_types = self.params.get('cont_types', '/run/container/*') # Create a pool and start dfuse. - self.add_pool(connect=False) + self.log_step('Creating a single pool') + pool = self.get_pool(connect=False) for cont_type in cont_types: + description = f"{cont_type if cont_type == 'POSIX' else 'non-POSIX'}" # Get container params - self.add_container(self.pool, create=False) + self.log_step(f'Creating a {description} container') + container = self.get_container(pool, create=False) # create container - if cont_type == "POSIX": - self.container.type.update(cont_type) - self.container.create() + if cont_type == 'POSIX': + container.type.update(cont_type) + container.create() # Attempt to mount the dfuse mount point - this should only succeed # with a POSIX container + self.log_step(f'Attempting to mount dfuse with a {description} container') + dfuse = get_dfuse(self, self.hostlist_clients) try: - self.start_dfuse( - self.hostlist_clients, self.pool, self.container) - if cont_type != "POSIX": - self.fail("Non-POSIX type container mounted over dfuse") + start_dfuse(self, dfuse, pool, container) + if cont_type != 'POSIX': + self.fail(f'Dfuse mount succeeded with a {description} container') except TestFail as error: - if cont_type == "POSIX": - self.fail( - "POSIX type container failed dfuse mount: {}".format( - error)) - self.log.info( - "Non-POSIX type container expected to fail dfuse mount") + if cont_type == 'POSIX': + self.log.debug('%s', error) + self.fail(f'Dfuse failed to mount with a {description} container') + self.log.info('Dfuse mount expected to fail with a %s container', description) + + # Verify dfuse is running on the POSIX type container, then stop dfuse + if cont_type == 'POSIX': + self.log_step(f'Verifying dfuse is running with a {description} container') + dfuse.check_running() + self.log_step(f'Stopping dfuse with a {description} container') + dfuse.stop() - # Verify dfuse is running on the POSIX type container - if cont_type == "POSIX": - self.dfuse.check_running() + # Destroy the container for next iteration + self.log_step(f'Destroying a {description} container') + container.destroy(1) - # Stop dfuse and destroy the container for next iteration - if not cont_type == "": - self.stop_dfuse() - self.container.destroy(1) + self.log.info('Test passed') diff --git a/src/tests/ftest/dfuse/daos_build.py b/src/tests/ftest/dfuse/daos_build.py index 29d6b27bee3..03ab81cba1f 100644 --- a/src/tests/ftest/dfuse/daos_build.py +++ b/src/tests/ftest/dfuse/daos_build.py @@ -7,11 +7,12 @@ import os import time -from dfuse_test_base import DfuseTestBase +from apricot import TestWithServers +from dfuse_utils import get_dfuse, start_dfuse from run_utils import run_remote -class DaosBuild(DfuseTestBase): +class DaosBuild(TestWithServers): """Build DAOS over dfuse. :avocado: recursive @@ -132,8 +133,9 @@ def test_dfuse_daos_build_nocache(self): def run_build_test(self, cache_mode, il_lib=None, run_on_vms=False): """Run an actual test from above.""" # Create a pool, container and start dfuse. - self.add_pool(connect=False) - self.add_container(self.pool) + self.log_step('Creating a single pool and container') + pool = self.get_pool(connect=False) + container = self.get_container(pool) cont_attrs = {} @@ -165,7 +167,7 @@ def run_build_test(self, cache_mode, il_lib=None, run_on_vms=False): build_jobs = 5 * 2 remote_env['D_IL_MAX_EQ'] = '0' - self.load_dfuse(self.hostlist_clients, dfuse_namespace) + dfuse = get_dfuse(self, self.hostlist_clients, dfuse_namespace) if cache_mode == 'writeback': cont_attrs['dfuse-data-cache'] = '1m' @@ -179,39 +181,39 @@ def run_build_test(self, cache_mode, il_lib=None, run_on_vms=False): cont_attrs['dfuse-attr-time'] = cache_time cont_attrs['dfuse-dentry-time'] = cache_time cont_attrs['dfuse-ndentry-time'] = cache_time - self.dfuse.disable_wb_cache.value = True + dfuse.disable_wb_cache.value = True elif cache_mode == 'metadata': cont_attrs['dfuse-data-cache'] = '1m' cont_attrs['dfuse-attr-time'] = cache_time cont_attrs['dfuse-dentry-time'] = cache_time cont_attrs['dfuse-ndentry-time'] = cache_time - self.dfuse.disable_wb_cache.value = True + dfuse.disable_wb_cache.value = True elif cache_mode == 'data': build_time *= 2 cont_attrs['dfuse-data-cache'] = '1m' cont_attrs['dfuse-attr-time'] = '0' cont_attrs['dfuse-dentry-time'] = '0' cont_attrs['dfuse-ndentry-time'] = '0' - self.dfuse.disable_wb_cache.value = True + dfuse.disable_wb_cache.value = True elif cache_mode == 'nocache': build_time *= 4 cont_attrs['dfuse-data-cache'] = 'off' cont_attrs['dfuse-attr-time'] = '0' cont_attrs['dfuse-dentry-time'] = '0' cont_attrs['dfuse-ndentry-time'] = '0' - self.dfuse.disable_wb_cache.value = True - self.dfuse.disable_caching.value = True + dfuse.disable_wb_cache.value = True + dfuse.disable_caching.value = True else: - self.fail('Invalid cache_mode: {}'.format(cache_mode)) + self.fail(f'Invalid cache_mode: {cache_mode}') - self.container.set_attr(attrs=cont_attrs) + self.log_step('Starting dfuse') + container.set_attr(attrs=cont_attrs) + start_dfuse(self, dfuse, pool, container) - self.start_dfuse(self.hostlist_clients, self.pool, self.container) - - mount_dir = self.dfuse.mount_dir.value + mount_dir = dfuse.mount_dir.value build_dir = os.path.join(mount_dir, 'daos') - remote_env['PATH'] = '{}:$PATH'.format(os.path.join(mount_dir, 'venv', 'bin')) + remote_env['PATH'] = f"{os.path.join(mount_dir, 'venv', 'bin')}:$PATH" remote_env['VIRTUAL_ENV'] = os.path.join(mount_dir, 'venv') remote_env['COVFILE'] = os.environ['COVFILE'] @@ -226,7 +228,7 @@ def run_build_test(self, cache_mode, il_lib=None, run_on_vms=False): remote_env['D_IL_COMPATIBLE'] = '1' remote_env['D_IL_MAX_EQ'] = '0' - envs = ['export {}={}'.format(env, value) for env, value in remote_env.items()] + envs = [f'export {env}={value}' for env, value in remote_env.items()] preload_cmd = ';'.join(envs) @@ -250,6 +252,7 @@ def run_build_test(self, cache_mode, il_lib=None, run_on_vms=False): timeout = 10 * 60 if cmd.startswith('scons'): timeout = build_time * 60 + self.log_step(f'Running \'{cmd}\' with a {timeout}s timeout') start = time.time() result = run_remote( self.log, self.hostlist_clients, command, verbose=True, timeout=timeout) @@ -272,6 +275,8 @@ def run_build_test(self, cache_mode, il_lib=None, run_on_vms=False): run_remote(self.log, self.hostlist_clients, 'cat {}/config.log'.format(build_dir), timeout=30) if il_lib is not None: - self.fail('{} over dfuse with il in mode {}.\n'.format(fail_type, cache_mode)) + self.fail(f'{fail_type} over dfuse with il in mode {cache_mode}') else: - self.fail('{} over dfuse in mode {}.\n'.format(fail_type, cache_mode)) + self.fail(f'{fail_type} over dfuse in mode {cache_mode}') + + self.log.info('Test passed') diff --git a/src/tests/ftest/dfuse/enospace.py b/src/tests/ftest/dfuse/enospace.py index 4f4c1136baa..52f904acf03 100644 --- a/src/tests/ftest/dfuse/enospace.py +++ b/src/tests/ftest/dfuse/enospace.py @@ -7,10 +7,12 @@ import errno import os -from dfuse_test_base import DfuseTestBase +from apricot import TestWithServers +from dfuse_utils import get_dfuse, start_dfuse +from host_utils import get_local_host -class DfuseEnospace(DfuseTestBase): +class DfuseEnospace(TestWithServers): """Dfuse ENOSPC File base class. :avocado: recursive @@ -34,15 +36,22 @@ def test_dfuse_enospace(self): :avocado: tags=daosio,dfuse :avocado: tags=DfuseEnospace,test_dfuse_enospace """ + dfuse_hosts = get_local_host() + # Create a pool, container and start dfuse. - self.add_pool(connect=False) - self.add_container(self.pool) - self.start_dfuse(self.hostlist_clients, self.pool, self.container) + self.log_step('Creating a single pool with a POSIX container') + pool = self.get_pool(connect=False) + container = self.get_container(pool) + + self.log_step('Starting dfuse') + dfuse = get_dfuse(self, dfuse_hosts) + start_dfuse(self, dfuse, pool, container) # create large file and perform write to it so that if goes out of # space. - target_file = os.path.join(self.dfuse.mount_dir.value, "file.txt") + target_file = os.path.join(dfuse.mount_dir.value, 'file.txt') + self.log_step('Write to file until an error occurs') with open(target_file, 'wb', buffering=0) as fd: # Use a write size of 128. On EL 8 this could be 1MiB, however older kernels @@ -75,3 +84,5 @@ def test_dfuse_enospace(self): except OSError as error: if error.errno != errno.ENOSPC: raise + + self.log.info('Test passed') diff --git a/src/tests/ftest/dfuse/fio_pil4dfs_small.py b/src/tests/ftest/dfuse/fio_pil4dfs_small.py index 5e391325ee8..66ec6b2085e 100644 --- a/src/tests/ftest/dfuse/fio_pil4dfs_small.py +++ b/src/tests/ftest/dfuse/fio_pil4dfs_small.py @@ -6,6 +6,7 @@ import os +from dfuse_utils import get_dfuse, start_dfuse from fio_test_base import FioBase @@ -39,4 +40,10 @@ def test_fio_pil4dfs_small(self): :avocado: tags=FioPil4dfsSmall,test_fio_pil4dfs_small """ self.fio_cmd.env['LD_PRELOAD'] = os.path.join(self.prefix, 'lib64', 'libpil4dfs.so') + pool = self.get_pool(connect=False) + container = self.get_container(pool) + container.set_attr(attrs={'dfuse-direct-io-disable': 'on'}) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) + self.fio_cmd.update_directory(dfuse.mount_dir.value) self.execute_fio() diff --git a/src/tests/ftest/dfuse/fio_small.py b/src/tests/ftest/dfuse/fio_small.py index 69aee6b2507..fea62668f0e 100644 --- a/src/tests/ftest/dfuse/fio_small.py +++ b/src/tests/ftest/dfuse/fio_small.py @@ -4,6 +4,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ +from dfuse_utils import get_dfuse, start_dfuse from fio_test_base import FioBase @@ -37,4 +38,10 @@ def test_fio_small(self): :avocado: tags=dfuse,fio,checksum,tx :avocado: tags=FioSmall,test_fio_small """ + pool = self.get_pool(connect=False) + container = self.get_container(pool) + container.set_attr(attrs={'dfuse-direct-io-disable': 'on'}) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) + self.fio_cmd.update_directory(dfuse.mount_dir.value) self.execute_fio() diff --git a/src/tests/ftest/dfuse/mu_mount.py b/src/tests/ftest/dfuse/mu_mount.py index 8f2369583d1..32ace91abf7 100644 --- a/src/tests/ftest/dfuse/mu_mount.py +++ b/src/tests/ftest/dfuse/mu_mount.py @@ -6,12 +6,13 @@ import os from getpass import getuser +from apricot import TestWithServers from ClusterShell.NodeSet import NodeSet -from dfuse_test_base import DfuseTestBase +from dfuse_utils import get_dfuse, start_dfuse from run_utils import command_as_user, run_remote -class DfuseMUMount(DfuseTestBase): +class DfuseMUMount(TestWithServers): """Verifies multi-user dfuse mounting :avocado: recursive @@ -35,18 +36,18 @@ def test_dfuse_mu_mount_basic(self): :avocado: tags=dfuse,dfuse_mu,daos_cmd :avocado: tags=DfuseMUMount,test_dfuse_mu_mount_basic """ - self.log.info('Creating a pool and container for dfuse') + self.log_step('Creating a pool and container for dfuse') pool = self.get_pool(connect=False) cont = self.get_container(pool, label='root_cont') # Setup dfuse - self.load_dfuse(self.hostlist_clients) - self.dfuse.update_params(pool=pool.identifier, cont=cont.label.value) - root_dir = self.dfuse.mount_dir.value + dfuse = get_dfuse(self, self.hostlist_clients) + dfuse.update_params(pool=pool.identifier, cont=cont.label.value) + root_dir = dfuse.mount_dir.value # Use a different log file for each user - root_log_file = self.dfuse.env["D_LOG_FILE"] + ".root" - dfuse_user_log_file = self.dfuse.env["D_LOG_FILE"] + "." + getuser() + root_log_file = dfuse.env["D_LOG_FILE"] + ".root" + dfuse_user_log_file = dfuse.env["D_LOG_FILE"] + "." + getuser() # For verifying expected permission failure def _check_fail(result): @@ -54,53 +55,54 @@ def _check_fail(result): if 'DER_NO_PERM' not in stdout: self.fail('Expected mount as root to fail without ACLs in single-user mode') - self.log.info('Verify root cannot mount the container without ACLs in single-user mode') - self.dfuse.update_params(multi_user=False) - self.dfuse.run_user = 'root' - self.dfuse.env["D_LOG_FILE"] = root_log_file - self.dfuse.run(check=False, mount_callback=_check_fail) - self.dfuse.stop() - - self.log.info('Verify root cannot mount the container without ACLs in multi-user mode') - self.dfuse.update_params(multi_user=True) - self.dfuse.run_user = 'root' - self.dfuse.env["D_LOG_FILE"] = root_log_file - self.dfuse.run(check=False, mount_callback=_check_fail) - self.dfuse.stop() - - self.log.info('Mounting dfuse in single-user mode') - self.dfuse.update_params(multi_user=False) - self.dfuse.run_user = None # Current user - self.dfuse.env["D_LOG_FILE"] = dfuse_user_log_file - self.dfuse.run() - - self.log.info('Verify stat as dfuse user in single-user mode succeeds') + self.log_step('Verify root cannot mount the container without ACLs in single-user mode') + dfuse.update_params(multi_user=False) + dfuse.run_user = 'root' + dfuse.env["D_LOG_FILE"] = root_log_file + dfuse.run(check=False, mount_callback=_check_fail) + dfuse.stop() + + self.log_step('Verify root cannot mount the container without ACLs in multi-user mode') + dfuse.update_params(multi_user=True) + dfuse.run_user = 'root' + dfuse.env["D_LOG_FILE"] = root_log_file + dfuse.run(check=False, mount_callback=_check_fail) + dfuse.stop() + + self.log_step('Mounting dfuse in single-user mode') + dfuse.update_params(multi_user=False) + dfuse.run_user = None # Current user + dfuse.env["D_LOG_FILE"] = dfuse_user_log_file + dfuse.run() + + self.log_step('Verify stat as dfuse user in single-user mode succeeds') command = 'stat {}'.format(root_dir) if not run_remote(self.log, self.hostlist_clients, command).passed: self.fail('Failed to stat in single-user mode') - self.log.info('Verify stat as root user in single-user mode fails') + self.log_step('Verify stat as root user in single-user mode fails') command = command_as_user('stat {}'.format(root_dir), 'root') if run_remote(self.log, self.hostlist_clients, command).passed: self.fail('Expected stat to fail as root in single-user mode') - self.log.info('Re-mounting dfuse in multi-user mode') - self.dfuse.stop() - self.dfuse.update_params(multi_user=True) - self.dfuse.run() + self.log_step('Re-mounting dfuse in multi-user mode') + dfuse.stop() + dfuse.update_params(multi_user=True) + dfuse.run() - self.log.info('Verify stat as dfuse user in multi-user mode succeeds') + self.log_step('Verify stat as dfuse user in multi-user mode succeeds') command = 'stat {}'.format(root_dir) if not run_remote(self.log, self.hostlist_clients, command).passed: self.fail('Failed to stat in multi-user mode') - self.log.info('Verify stat as root user in multi-user mode succeeds') + self.log_step('Verify stat as root user in multi-user mode succeeds') command = command_as_user('stat {}'.format(root_dir), 'root') if not run_remote(self.log, self.hostlist_clients, command).passed: self.fail('Failed to stat as root in multi-user mode') # Cleanup leftover dfuse - self.dfuse.stop() + self.log_step('Cleanup leftover dfuse') + dfuse.stop() # Give root permission to read the pool pool.update_acl(False, entry="A::root@:r") @@ -108,19 +110,19 @@ def _check_fail(result): # Give root permission to read the container and access properties cont.update_acl(entry="A::root@:rt") - self.log.info('Verify root can mount the container with ACLs in single-user mode') - self.dfuse.update_params(multi_user=False) - self.dfuse.run_user = 'root' - self.dfuse.env["D_LOG_FILE"] = root_log_file - self.dfuse.run() - self.dfuse.stop() + self.log_step('Verify root can mount the container with ACLs in single-user mode') + dfuse.update_params(multi_user=False) + dfuse.run_user = 'root' + dfuse.env["D_LOG_FILE"] = root_log_file + dfuse.run() + dfuse.stop() - self.log.info('Verify root can mount the container with ACLs in muli-user mode') - self.dfuse.update_params(multi_user=True) - self.dfuse.run_user = 'root' - self.dfuse.env["D_LOG_FILE"] = dfuse_user_log_file - self.dfuse.run() - self.dfuse.stop() + self.log_step('Verify root can mount the container with ACLs in multi-user mode') + dfuse.update_params(multi_user=True) + dfuse.run_user = 'root' + dfuse.env["D_LOG_FILE"] = dfuse_user_log_file + dfuse.run() + dfuse.stop() def test_dfuse_mu_mount_uns(self): """JIRA ID: DAOS-10859. @@ -139,20 +141,19 @@ def test_dfuse_mu_mount_uns(self): """ dfuse_user = getuser() - self.log.info('Creating a pool and container for dfuse') + self.log_step('Creating a pool and container for dfuse') pool1 = self.get_pool(connect=False) cont1 = self.get_container(pool1, label='root_cont') - self.log.info('Starting dfuse with the current user in multi-user mode') - self.load_dfuse(self.hostlist_clients) - self.dfuse.update_params(pool=pool1.identifier, cont=cont1.label.value, multi_user=True) - root_dir = self.dfuse.mount_dir.value - self.dfuse.run() + self.log_step('Starting dfuse with the current user in multi-user mode') + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool1, cont1, multi_user=True) + root_dir = dfuse.mount_dir.value - self.log.info('Creating a second pool') + self.log_step('Creating a second pool') pool2 = self.get_pool(connect=False) - self.log.info('Giving root permission to create containers in both pools') + self.log_step('Giving root permission to create containers in both pools') pool1.update_acl(False, entry="A::root@:rw") pool2.update_acl(False, entry="A::root@:rw") @@ -171,7 +172,7 @@ def _verify_uns(pool_label, cont_label): if not run_remote(self.log, first_client, command).passed: self.fail('Failed to create sub-container as root in multi-user mode') - self.log.info('Verify dfuse user was automatically given ACLs for the new container') + self.log_step('Verify dfuse user was automatically given ACLs for the new container') expected_acl = 'A::{}@:rwt'.format(dfuse_user) self.log.info('Expected ACL: %s', expected_acl) command = command_as_user( @@ -183,22 +184,22 @@ def _verify_uns(pool_label, cont_label): if expected_acl not in stdout: self.fail('Expected ACL for container created by root: {}'.format(expected_acl)) - self.log.info('Verify dfuse user can access the container created by root') + self.log_step('Verify dfuse user can access the container created by root') command = '{} container get-prop --path {}'.format(daos_path, cont_path) if not run_remote(self.log, first_client, command).passed: self.fail('Failed to get sub-container properties in multi-user mode') - self.log.info('Verify dfuse user can read the container created by root') + self.log_step('Verify dfuse user can read the container created by root') command = 'ls -l {}'.format(cont_path) if not run_remote(self.log, first_client, command).passed: self.fail('Failed to read container created by root, as dfuse user') - self.log.info('Verify root can read its own container') + self.log_step('Verify root can read its own container') command = command_as_user('ls -l {}'.format(cont_path), 'root') if not run_remote(self.log, first_client, command).passed: self.fail('Failed to read container created by root, as root') - self.log.info('Revoking ACLs for just dfuse user on the container created by root') + self.log_step('Revoking ACLs for just dfuse user on the container created by root') principal = 'u:{}@'.format(dfuse_user) command = command_as_user( '{} container delete-acl --path {} --principal {}'.format( @@ -207,22 +208,22 @@ def _verify_uns(pool_label, cont_label): if not run_remote(self.log, first_client, command).passed: self.fail('Failed to revoke ACLs on container created by root') - self.log.info('Restarting dfuse to pick up ACL changes') - self.dfuse.stop() - self.dfuse.run() + self.log_step('Restarting dfuse to pick up ACL changes') + dfuse.stop() + dfuse.run() - self.log.info('Verifying dfuse user can no longer read the container through dfuse') + self.log_step('Verifying dfuse user can no longer read the container through dfuse') command = 'ls -l {}'.format(cont_path) if run_remote(self.log, first_client, command).passed: self.fail('Expected ls to fail on container created by root, as dfuse user') - self.log.info('Verifying root can no longer read the container through dfuse') + self.log_step('Verifying root can no longer read the container through dfuse') command = command_as_user('ls -l {}'.format(cont_path), 'root') if run_remote(self.log, first_client, command).passed: self.fail('Expected ls to fail on container created by root, as root') - self.log.info('Verify UNS sub-container create as root - in dfuse pool') + self.log_step('Verify UNS sub-container create as root - in dfuse pool') _verify_uns(pool_label="", cont_label='pool1_root_cont') - self.log.info('Verify UNS sub-container create as root - in different pool') + self.log_step('Verify UNS sub-container create as root - in different pool') _verify_uns(pool_label=pool2.identifier, cont_label='pool2_root_cont') diff --git a/src/tests/ftest/dfuse/mu_perms.py b/src/tests/ftest/dfuse/mu_perms.py index ffe1c91e8b9..a6fc558f157 100644 --- a/src/tests/ftest/dfuse/mu_perms.py +++ b/src/tests/ftest/dfuse/mu_perms.py @@ -71,12 +71,12 @@ def test_dfuse_mu_perms(self): # Run dfuse as dfuse_user dfuse = get_dfuse(self, client) dfuse.run_user = dfuse_user - start_dfuse(self, dfuse, pool=pool, container=cont) + start_dfuse(self, dfuse, pool, cont) # Verify each permission mode and entry type for _mode, _type in product(('simple', 'real'), ('file', 'dir')): path = os.path.join(dfuse.mount_dir.value, 'test_' + _type) - self.log.info('Verifying %s %s permissions on %s', _mode, _type, path) + self.log_step(f'Verifying {_mode} {_type} permissions on {path}') verify_perms_cmd.update_params(path=path, create_type=_type, verify_mode=_mode) verify_perms_cmd.run() self.log.info('Passed %s %s permissions on %s', _mode, _type, path) @@ -93,7 +93,7 @@ def test_dfuse_mu_perms(self): # Verify real permissions for _type in ('file', 'dir'): path = os.path.join(sub_dir, 'test_' + _type) - self.log.info('Verifying real %s permissions on %s', _type, path) + self.log_step(f'Verifying real {_type} permissions on {path}') verify_perms_cmd.update_params(path=path, create_type=_type, verify_mode='real') verify_perms_cmd.run() self.log.info('Passed real %s permissions on %s', _type, path) @@ -111,13 +111,12 @@ def test_dfuse_mu_perms(self): # Verify real permissions for _type in ('file', 'dir'): path = os.path.join(sub_dir, 'test_' + _type) - self.log.info('Verifying real %s permissions on %s', _type, path) + self.log_step(f'Verifying real {_type} permissions on {path}') verify_perms_cmd.update_params(path=path, create_type=_type, verify_mode='real') verify_perms_cmd.run() self.log.info('Passed real %s permissions on %s', _type, path) - # Stop dfuse instances. Needed until containers are cleaned up with with register_cleanup - dfuse.stop() + self.log.info('Test passed') def _create_dir_and_chown(self, client, path, create_as, owner, group=None): """Create a directory and give some user and group ownership. @@ -130,21 +129,21 @@ def _create_dir_and_chown(self, client, path, create_as, owner, group=None): group (str): group to give ownership to """ - self.log.info('Creating directory: %s', path) + self.log_step('Creating directory: %s', path) command = command_as_user('mkdir ' + path, create_as) if not run_remote(self.log, client, command).passed: - self.fail('Failed to create directory: {}'.format(path)) + self.fail(f'Failed to create directory: {path}') if group: - self.log.info('Giving ownership to %s:%s', owner, group) + self.log_step(f'Giving ownership to {owner}:{group}') else: - self.log.info('Giving ownership to %s', owner) + self.log_step(f'Giving ownership to {owner}') command = command_as_user(get_chown_command(user=owner, group=group, file=path), 'root') if not run_remote(self.log, client, command).passed: - self.fail('Failed to give ownership to {}'.format(owner)) - command = command_as_user('stat {}'.format(path), owner) + self.fail(f'Failed to give ownership to {owner}') + command = command_as_user(f'stat {path}', owner) if not run_remote(self.log, client, command).passed: - self.fail('Failed to stat {}'.format(path)) + self.fail(f'Failed to stat {path}') def test_dfuse_mu_perms_cache(self): """Jira ID: DAOS-10858. @@ -248,9 +247,7 @@ def _wait_for_cache_expiration(): path=dfuse2_entry_path, verify_mode='real', perms='000', no_chmod=True) verify_perms_cmd.run() - # Stop dfuse instances. Needed until containers are cleaned up with with register_cleanup - dfuse1.stop() - dfuse2.stop() + self.log.info('Test passed') def run_test_il(self, il_lib=None): """Jira ID: DAOS-10857. @@ -350,7 +347,7 @@ def _verify(use_il, expected_il_messages, expect_der_no_perm): dfuse_entry_path = os.path.join(dfuse.mount_dir.value, entry_type) create_cmd = 'touch' if entry_type == 'file' else 'mkdir' - self.log.info('Creating a test %s in dfuse', entry_type) + self.log_step('Creating a test %s in dfuse', entry_type) command = command_as_user('{} "{}"'.format(create_cmd, dfuse_entry_path), dfuse_user) if not run_remote(self.log, self.hostlist_clients, command).passed: self.fail('Failed to create test {}'.format(entry_type)) @@ -359,7 +356,7 @@ def _verify(use_il, expected_il_messages, expect_der_no_perm): # Revoke POSIX permissions posix_perms = {'file': '600', 'dir': '600'}[entry_type] - self.log.info('Setting %s POSIX permissions to %s', entry_type, posix_perms) + self.log_step(f'Setting {entry_type} POSIX permissions to {posix_perms}') command = command_as_user( 'chmod {} "{}"'.format(posix_perms, dfuse_entry_path), dfuse_user) if not run_remote(self.log, self.hostlist_clients, command).passed: @@ -368,14 +365,14 @@ def _verify(use_il, expected_il_messages, expect_der_no_perm): # Without pool/container ACLs, access is based on POSIX perms, # which the user also doesn't have verify_perms_cmd.update_params(perms=posix_perms) - self.log.info('Verify - no perms - not using IL') + self.log_step('Verify - no perms - not using IL') _verify(use_il=False, expected_il_messages=0, expect_der_no_perm=False) - self.log.info('Verify - no perms - using IL') + self.log_step('Verify - no perms - using IL') _verify(use_il=True, expected_il_messages=2, expect_der_no_perm=False) # Give the user POSIX perms posix_perms = {'file': '606', 'dir': '505'}[entry_type] - self.log.info('Setting %s POSIX permissions to %s', entry_type, posix_perms) + self.log_step(f'Setting {entry_type} POSIX permissions to {posix_perms}') command = command_as_user( 'chmod {} "{}"'.format(posix_perms, dfuse_entry_path), dfuse_user) if not run_remote(self.log, self.hostlist_clients, command).passed: @@ -383,26 +380,26 @@ def _verify(use_il, expected_il_messages, expect_der_no_perm): # With POSIX perms only, access is based on POSIX perms whether using IL or not verify_perms_cmd.update_params(perms=posix_perms) - self.log.info('Verify - POSIX perms only - not using IL') + self.log_step('Verify - POSIX perms only - not using IL') _verify(use_il=False, expected_il_messages=0, expect_der_no_perm=False) - self.log.info('Verify - POSIX perms only - using IL') + self.log_step('Verify - POSIX perms only - using IL') _verify(use_il=True, expected_il_messages=2, expect_der_no_perm=True) # Give the user pool/container ACL perms - self.log.info('Giving %s pool "r" ACL permissions', other_user) + self.log_step('Giving %s pool "r" ACL permissions', other_user) pool.update_acl(use_acl=False, entry="A::{}@:r".format(other_user)) - self.log.info('Giving %s container "rwt" ACL permissions', other_user) + self.log_step('Giving %s container "rwt" ACL permissions', other_user) cont.update_acl(entry="A::{}@:rwt".format(other_user)) # With POSIX perms and ACLs, open is based on POSIX, but IO is based on ACLs - self.log.info('Verify - POSIX and ACL perms - not using IL') + self.log_step('Verify - POSIX and ACL perms - not using IL') _verify(use_il=False, expected_il_messages=0, expect_der_no_perm=False) - self.log.info('Verify - POSIX and ACL perms - using IL') + self.log_step('Verify - POSIX and ACL perms - using IL') _verify(use_il=True, expected_il_messages=4, expect_der_no_perm=False) # Revoke POSIX permissions posix_perms = {'file': '600', 'dir': '00'}[entry_type] - self.log.info('Setting %s POSIX permissions to %s', entry_type, posix_perms) + self.log_step(f'Setting {entry_type} POSIX permissions to {posix_perms}') command = command_as_user( 'chmod {} "{}"'.format(posix_perms, dfuse_entry_path), dfuse_user) if not run_remote(self.log, self.hostlist_clients, command).passed: @@ -410,14 +407,11 @@ def _verify(use_il, expected_il_messages, expect_der_no_perm): # Without POSIX permissions, pool/container ACLs don't matter since open requires POSIX verify_perms_cmd.update_params(perms=posix_perms) - self.log.info('Verify - ACLs only - not using IL') + self.log_step('Verify - ACLs only - not using IL') _verify(use_il=False, expected_il_messages=0, expect_der_no_perm=False) - self.log.info('Verify - ACLs only - using IL') + self.log_step('Verify - ACLs only - using IL') _verify(use_il=True, expected_il_messages=2, expect_der_no_perm=False) - # Stop dfuse instances. Needed until containers are cleaned up with with register_cleanup - dfuse.stop() - def test_dfuse_mu_perms_ioil(self): """ :avocado: tags=all,daily_regression @@ -426,6 +420,7 @@ def test_dfuse_mu_perms_ioil(self): :avocado: tags=DfuseMUPerms,test_dfuse_mu_perms_ioil """ self.run_test_il(il_lib='libioil.so') + self.log.info('Test passed') def test_dfuse_mu_perms_pil4dfs(self): """ @@ -435,3 +430,4 @@ def test_dfuse_mu_perms_pil4dfs(self): :avocado: tags=DfuseMUPerms,test_dfuse_mu_perms_pil4dfs """ self.run_test_il(il_lib='libpil4dfs.so') + self.log.info('Test passed') diff --git a/src/tests/ftest/dfuse/pil4dfs_dcache.py b/src/tests/ftest/dfuse/pil4dfs_dcache.py index 8cb18de389c..683b95a0066 100644 --- a/src/tests/ftest/dfuse/pil4dfs_dcache.py +++ b/src/tests/ftest/dfuse/pil4dfs_dcache.py @@ -7,12 +7,12 @@ import os import re +from apricot import TestWithServers from ClusterShell.NodeSet import NodeSet -from dfuse_test_base import DfuseTestBase -from dfuse_utils import Pil4dfsDcacheCmd +from dfuse_utils import Pil4dfsDcacheCmd, get_dfuse, start_dfuse -class Pil4dfsDcache(DfuseTestBase): +class Pil4dfsDcache(TestWithServers): """Test class Description: Runs set of unit test on pil4dfs directory cache. :avocado: recursive @@ -357,15 +357,21 @@ def setUp(self): super().setUp() def _mount_dfuse(self): - """Mount a DFuse mount point.""" + """Mount a DFuse mount point. + + Returns: + Dfuse: a Dfuse object + """ self.log.info("Creating DAOS pool") - self.add_pool() + pool = self.get_pool() self.log.info("Creating DAOS container") - self.add_container(self.pool) + container = self.get_container(pool) self.log.info("Mounting DFuse mount point") - self.start_dfuse(self.hostlist_clients, self.pool, self.container) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) + return dfuse def _update_cmd_env(self, env, mnt, **kwargs): """Update the Pil4dfsDcacheCmd command environment. @@ -446,12 +452,12 @@ def test_pil4dfs_dcache_enabled(self): :avocado: tags=Pil4dfsDcache,test_pil4dfs_dcache_enabled """ self.log_step("Mount a DFuse mount point") - self._mount_dfuse() + dfuse = self._mount_dfuse() self.log.info("Running pil4dfs_dcache command") hostname = self.hostlist_clients[0] host = NodeSet(hostname) - mnt = self.dfuse.mount_dir.value + mnt = dfuse.mount_dir.value cmd = Pil4dfsDcacheCmd(host, self.prefix) self._update_cmd_env(cmd.env, mnt) @@ -480,12 +486,12 @@ def test_pil4dfs_dcache_disabled(self): :avocado: tags=Pil4dfsDcache,test_pil4dfs_dcache_disabled """ self.log_step("Mount a DFuse mount point") - self._mount_dfuse() + dfuse = self._mount_dfuse() self.log.info("Running pil4dfs_dcache command") hostname = self.hostlist_clients[0] host = NodeSet(hostname) - mnt = self.dfuse.mount_dir.value + mnt = dfuse.mount_dir.value cmd = Pil4dfsDcacheCmd(host, self.prefix) env_kwargs = {"D_IL_DCACHE_REC_TIMEOUT": 0} self._update_cmd_env(cmd.env, mnt, **env_kwargs) @@ -515,12 +521,12 @@ def test_pil4dfs_dcache_gc_disabled(self): :avocado: tags=Pil4dfsDcache,test_pil4dfs_dcache_gc_disabled """ self.log_step("Mount a DFuse mount point") - self._mount_dfuse() + dfuse = self._mount_dfuse() self.log_step("Run pil4dfs_dcache command") hostname = self.hostlist_clients[0] host = NodeSet(hostname) - mnt = self.dfuse.mount_dir.value + mnt = dfuse.mount_dir.value cmd = Pil4dfsDcacheCmd(host, self.prefix) env_kwargs = {"D_IL_DCACHE_GC_PERIOD": 0} self._update_cmd_env(cmd.env, mnt, **env_kwargs) diff --git a/src/tests/ftest/dfuse/pil4dfs_fio.py b/src/tests/ftest/dfuse/pil4dfs_fio.py index 49121e38237..c33e9be92dc 100644 --- a/src/tests/ftest/dfuse/pil4dfs_fio.py +++ b/src/tests/ftest/dfuse/pil4dfs_fio.py @@ -7,14 +7,15 @@ import json import os +from apricot import TestWithServers from ClusterShell.NodeSet import NodeSet from cpu_utils import CpuInfo -from dfuse_test_base import DfuseTestBase +from dfuse_utils import get_dfuse, start_dfuse from fio_utils import FioCommand from general_utils import bytes_to_human -class Pil4dfsFio(DfuseTestBase): +class Pil4dfsFio(TestWithServers): """Test class Description: Runs Fio with in small config. :avocado: recursive @@ -26,7 +27,6 @@ def __init__(self, *args, **kwargs): """Initialize a FioPil4dfs object.""" super().__init__(*args, **kwargs) - self.fio_cmd = None self.fio_params = {"thread": "", "blocksize": "", "size": ""} self.fio_numjobs = 0 self.fio_cpus_allowed = "" @@ -57,24 +57,10 @@ def setUp(self): def _create_container(self): """Created a DAOS POSIX container""" self.log.info("Creating pool") - self.assertIsNone(self.pool, "Unexpected pool before starting test") - self.add_pool() + pool = self.get_pool() self.log.info("Creating container") - self.assertIsNone(self.container, "Unexpected container before starting test") - self.add_container(self.pool) - - def _destroy_container(self): - """Destroy DAOS POSIX container previously created""" - if self.container is not None: - self.log.debug("Destroying container %s", str(self.container)) - self.destroy_containers(self.container) - self.container = None - - if self.pool is not None: - self.log.debug("Destroying pool %s", str(self.pool)) - self.destroy_pools(self.pool) - self.pool = None + return self.get_container(pool) def _get_bandwidth(self, fio_result, rw): """Returns FIO bandwidth of a given I/O pattern @@ -101,42 +87,40 @@ def _run_fio_dfuse(self): dict: Read and Write bandwidths of the FIO command. """ - self._create_container() - self.log.info("Mounting DFuse mount point") - self.start_dfuse(self.hostlist_clients, self.pool, self.container) - self.log.debug("Mounted DFuse mount point %s", str(self.dfuse)) - - self.fio_cmd = FioCommand() - self.fio_cmd.get_params(self) - self.fio_cmd.update( - "global", "directory", self.dfuse.mount_dir.value, - f"fio --name=global --directory={self.dfuse.mount_dir.value}") - self.fio_cmd.update("global", "ioengine", "psync", "fio --name=global --ioengine='psync'") - self.fio_cmd.update( + container = self._create_container() + + self.log_step("Mounting DFuse mount point") + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, container.pool, container) + self.log.debug("Mounted DFuse mount point %s", str(dfuse)) + + fio_cmd = FioCommand() + fio_cmd.get_params(self) + fio_cmd.update_directory(dfuse.mount_dir.value) + fio_cmd.update("global", "ioengine", "psync", "fio --name=global --ioengine='psync'") + fio_cmd.update( "global", "numjobs", self.fio_numjobs, f"fio --name=global --numjobs={self.fio_numjobs}") - self.fio_cmd.update( + fio_cmd.update( "global", "cpus_allowed", self.fio_cpus_allowed, f"fio --name=global --cpus_allowed={self.fio_cpus_allowed}") - self.fio_cmd.env['LD_PRELOAD'] = os.path.join(self.prefix, 'lib64', 'libpil4dfs.so') - self.fio_cmd.hosts = self.hostlist_clients + fio_cmd.env['LD_PRELOAD'] = os.path.join(self.prefix, 'lib64', 'libpil4dfs.so') + fio_cmd.hosts = self.hostlist_clients bws = {} for rw in Pil4dfsFio._FIO_RW_NAMES: - self.fio_cmd.update("job", "rw", rw, f"fio --name=job --rw={rw}") + fio_cmd.update("job", "rw", rw, f"fio --name=job --rw={rw}") params = ", ".join(f"{name}={value}" for name, value in self.fio_params.items()) self.log.info("Running FIO command: rw=%s, %s", rw, params) - self.log.debug( - "FIO command: LD_PRELOAD=%s %s", self.fio_cmd.env['LD_PRELOAD'], str(self.fio_cmd)) - result = self.fio_cmd.run() + self.log.debug("FIO command: LD_PRELOAD=%s %s", fio_cmd.env['LD_PRELOAD'], str(fio_cmd)) + result = fio_cmd.run() bws[rw] = self._get_bandwidth(result, rw) self.log.debug("DFuse bandwidths for %s: %s", rw, bws[rw]) - if self.dfuse is not None: - self.log.debug("Stopping DFuse mount point %s", str(self.dfuse)) - self.stop_dfuse() - self._destroy_container() + dfuse.stop() + container.destroy() + container.pool.destroy() return bws @@ -147,38 +131,36 @@ def _run_fio_dfs(self): dict: Read and Write bandwidths of the FIO command. """ - self._create_container() + container = self._create_container() - self.fio_cmd = FioCommand() - self.fio_cmd.get_params(self) - self.fio_cmd.update("global", "ioengine", "dfs", "fio --name=global --ioengine='dfs'") - self.fio_cmd.update( + fio_cmd = FioCommand() + fio_cmd.get_params(self) + fio_cmd.update("global", "ioengine", "dfs", "fio --name=global --ioengine='dfs'") + fio_cmd.update( "global", "numjobs", self.fio_numjobs, f"fio --name=global --numjobs={self.fio_numjobs}") - self.fio_cmd.update( + fio_cmd.update( "global", "cpus_allowed", self.fio_cpus_allowed, f"fio --name=global --cpus_allowed={self.fio_cpus_allowed}") # NOTE DFS ioengine options must come after the ioengine that defines them is selected. - self.fio_cmd.update( - "job", "pool", self.pool.uuid, - f"fio --name=job --pool={self.pool.uuid}") - self.fio_cmd.update( - "job", "cont", self.container.uuid, - f"fio --name=job --cont={self.container.uuid}") - self.fio_cmd.hosts = self.hostlist_clients + fio_cmd.update( + "job", "pool", container.pool.uuid, f"fio --name=job --pool={container.pool.uuid}") + fio_cmd.update("job", "cont", container.uuid, f"fio --name=job --cont={container.uuid}") + fio_cmd.hosts = self.hostlist_clients bws = {} for rw in Pil4dfsFio._FIO_RW_NAMES: - self.fio_cmd.update("job", "rw", rw, f"fio --name=job --rw={rw}") + fio_cmd.update("job", "rw", rw, f"fio --name=job --rw={rw}") params = ", ".join(f"{name}={value}" for name, value in self.fio_params.items()) self.log.info("Running FIO command: rw=%s, %s", rw, params) - self.log.debug("FIO command: %s", str(self.fio_cmd)) - result = self.fio_cmd.run() + self.log.debug("FIO command: %s", str(fio_cmd)) + result = fio_cmd.run() bws[rw] = self._get_bandwidth(result, rw) self.log.debug("DFS bandwidths for %s: %s", rw, bws[rw]) - self._destroy_container() + container.destroy() + container.pool.destroy() return bws diff --git a/src/tests/ftest/dfuse/read.py b/src/tests/ftest/dfuse/read.py index b01b6655d19..607b8f99f9f 100644 --- a/src/tests/ftest/dfuse/read.py +++ b/src/tests/ftest/dfuse/read.py @@ -5,11 +5,12 @@ import time -from dfuse_test_base import DfuseTestBase +from apricot import TestWithServers +from dfuse_utils import get_dfuse, start_dfuse from run_utils import run_remote -class DFusePreReadTest(DfuseTestBase): +class DFusePreReadTest(TestWithServers): """Base ReadTest test class. :avocado: recursive """ @@ -31,7 +32,7 @@ def test_dfuse_pre_read(self): pool = self.get_pool(connect=False) container = self.get_container(pool) - self.load_dfuse(self.hostlist_clients, None) + dfuse = get_dfuse(self, self.hostlist_clients) cont_attrs = {} @@ -42,9 +43,9 @@ def test_dfuse_pre_read(self): container.set_attr(attrs=cont_attrs) - self.start_dfuse(self.hostlist_clients, pool, container) + start_dfuse(self, dfuse, pool, container) - fuse_root_dir = self.dfuse.mount_dir.value + fuse_root_dir = dfuse.mount_dir.value # make a directory to run the test from. Pre-read is based on previous access to a # directory so this needs to be evicted after the write and before the test so the @@ -77,7 +78,7 @@ def test_dfuse_pre_read(self): time.sleep(1) # Sample the stats, later on we'll check this. - data = self.dfuse.get_stats() + data = dfuse.get_stats() # Check that the inode has been evicted, and there's been no reads so far. self.assertEqual(data["inodes"], 1, "Incorrect number of active nodes") @@ -92,7 +93,7 @@ def test_dfuse_pre_read(self): if not result.passed: self.fail(f'"{cmd}" failed on {result.failed_hosts}') - data = self.dfuse.get_stats() + data = dfuse.get_stats() # pre_read requests are a subset of reads so for this test we should verify that they are # equal, and non-zero. @@ -111,7 +112,7 @@ def test_dfuse_pre_read(self): if not result.passed: self.fail(f'"{cmd}" failed on {result.failed_hosts}') - data = self.dfuse.get_stats() + data = dfuse.get_stats() # pre_read requests are a subset of reads so for this test we should verify that they are # equal, and non-zero. diff --git a/src/tests/ftest/dfuse/root_container.py b/src/tests/ftest/dfuse/root_container.py index efce567ea01..8daed813027 100644 --- a/src/tests/ftest/dfuse/root_container.py +++ b/src/tests/ftest/dfuse/root_container.py @@ -1,61 +1,21 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ -from dfuse_test_base import DfuseTestBase +import os + +from apricot import TestWithServers +from dfuse_utils import get_dfuse, start_dfuse from run_utils import run_remote -class RootContainerTest(DfuseTestBase): +class RootContainerTest(TestWithServers): """Base Dfuse Container check test class. :avocado: recursive """ - def __init__(self, *args, **kwargs): - """Initialize a RootContainerTest object.""" - super().__init__(*args, **kwargs) - self.pool = [] - self.container = [] - self.tmp_file_count = self.params.get("tmp_file_count", '/run/container/*') - self.cont_count = self.params.get("cont_count", '/run/container/*') - self.tmp_file_size = self.params.get("tmp_file_size", '/run/container/*') - self.tmp_file_name = self.params.get("tmp_file_name", '/run/container/*') - # device where the pools and containers are created - self.device = "scm" - - def setUp(self): - """Set up each test case.""" - # Start the servers and agents - super().setUp() - self.dfuse_hosts = None - - def _create_pool(self): - """Add a new TestPool object to the list of pools. - - Returns: - TestPool: the newly added pool - - """ - self.pool.append(self.get_pool(connect=False)) - return self.pool[-1] - - def _create_cont(self, pool, **params): - """Add a new TestContainer object to the list of containers. - - Args: - pool (TestPool): pool object - params (dict, optional): name/value of container attributes to update - - Returns: - TestContainer: the newly added container - - """ - container = self.get_container(pool, **params) - self.container.append(container) - return container - def test_dfuse_root_container(self): """Jira ID: DAOS-3782. @@ -75,40 +35,79 @@ def test_dfuse_root_container(self): :avocado: tags=container,dfuse :avocado: tags=RootContainerTest,test_dfuse_root_container """ - # Create a pool and start dfuse. - pool = self._create_pool() - container = self._create_cont(pool) - self.dfuse_hosts = self.agent_managers[0].hosts - # mount fuse - self.start_dfuse(self.dfuse_hosts, pool, container) - # Create another container and add it as sub container under - # root container - sub_container = str(self.dfuse.mount_dir.value + "/cont0") - container = self._create_cont(pool, path=sub_container) + cont_count = self.params.get("cont_count", '/run/container/*') + pool_count = self.params.get("pool_count", "/run/pool/*") + tmp_file_name = self.params.get("tmp_file_name", '/run/container/*') + tmp_file_count = self.params.get("tmp_file_count", '/run/container/*') + tmp_file_size = self.params.get("tmp_file_size", '/run/container/*') + device = "scm" + + # Create a pool and container. + self.log_step("Create a pool and a root container") + pool = self.get_pool(connect=False) + container = self.get_container(pool) + + # Start and mount fuse + dfuse_hosts = self.agent_managers[0].hosts + self.log_step("Start and mount dfuse using the root container") + dfuse = get_dfuse(self, dfuse_hosts) + start_dfuse(self, dfuse, pool, container) + + # Create another container and add it as sub container under the root container + self.log_step("Add another container as a sub container under the root container") + sub_container = str(dfuse.mount_dir.value + "/cont0") + self.get_container(pool, path=sub_container) + # Insert files into root container - self.insert_files_and_verify("") + self.log_step("Insert files into the root container") + self.insert_files_and_verify( + dfuse_hosts, dfuse.mount_dir.value, tmp_file_count, tmp_file_name, tmp_file_size) + # Insert files into sub container - self.insert_files_and_verify("cont0") + self.log_step("Insert files into the sub container") + self.insert_files_and_verify( + dfuse_hosts, os.path.join(dfuse.mount_dir.value, "cont0"), tmp_file_count, + tmp_file_name, tmp_file_size) + # Create 100 sub containers and verify the temp files - self.verify_create_delete_containers(pool, 100) - self.verify_multi_pool_containers() + self.verify_create_delete_containers( + pool, device, 100, dfuse_hosts, dfuse.mount_dir.value, tmp_file_count, tmp_file_size, + tmp_file_size) + self.verify_multi_pool_containers( + pool_count, cont_count, dfuse_hosts, dfuse.mount_dir.value, tmp_file_count, + tmp_file_name, tmp_file_size) - def verify_multi_pool_containers(self): + self.log.info("Test Passed") + + def verify_multi_pool_containers(self, pool_count, cont_count, hosts, mount_dir, tmp_file_count, + tmp_file_name, tmp_file_size): """Verify multiple pools and containers. Create several pools and containers and mount it under the root container and verify they're accessible. + + Args: + pool_count (int): number of pools to create + cont_count (int): number of containers to create in each pool + hosts (NodeSet): Hosts on which to run the commands + mount_dir (str): dfuse mount directory + tmp_file_count (int): number of temporary files + tmp_file_name (str): base name for temporary files + tmp_file_size (int): size of temporary files """ - pool_count = self.params.get("pool_count", "/run/pool/*") + self.log_step( + f"Create {pool_count} pools with {cont_count} containers mounted under the root " + "container and insert files into each new container") for idx in range(pool_count): - pool = self._create_pool() - for jdx in range(self.cont_count): - cont_name = "/cont_{}{}".format(idx, jdx) - sub_cont = str(self.dfuse.mount_dir.value + cont_name) - self._create_cont(pool=pool, path=sub_cont) - self.insert_files_and_verify(cont_name) - - def verify_create_delete_containers(self, pool, cont_count): + pool = self.get_pool(connect=False) + for jdx in range(cont_count): + sub_container = str(mount_dir + f"/cont_{idx}{jdx}") + self.get_container(pool=pool, path=sub_container) + self.insert_files_and_verify( + hosts, sub_container, tmp_file_count, tmp_file_name, tmp_file_size) + + def verify_create_delete_containers(self, pool, device, cont_count, hosts, mount_dir, + tmp_file_count, tmp_file_name, tmp_file_size): """Verify multiple pools and containers creation and deletion. Create multiple containers and multiple multi-mb files in each of @@ -117,76 +116,87 @@ def verify_create_delete_containers(self, pool, cont_count): Destroy half of the containers and verify the space usage is reclaimed. Args: + pool (TestPool): pool in which to create the containers + device (str): device where the pools and containers are created cont_count (int): Number of containers to be created. + hosts (NodeSet): Hosts on which to run the commands + tmp_file_count (int): number of temporary files + tmp_file_name (str): base name for temporary files + tmp_file_size (int): size of temporary files """ self.log.info("Verifying multiple container create delete") - pool_space_before = pool.get_pool_free_space(self.device) + self.log_step(f"Create {cont_count} new sub containers and insert files") + pool_space_before = pool.get_pool_free_space(device) self.log.info("Pool space before = %s", pool_space_before) + containers = [] for idx in range(cont_count): - sub_cont = str(self.dfuse.mount_dir.value + "/cont{}".format(idx + 1)) - self._create_cont(pool, path=sub_cont) - self.insert_files_and_verify("cont{}".format(idx + 1)) - expected = pool_space_before - \ - cont_count * self.tmp_file_count * self.tmp_file_size - pool_space_after = pool.get_pool_free_space(self.device) + sub_cont = str(mount_dir + f"/cont{idx + 1}") + containers.append(self.get_container(pool, path=sub_cont)) + self.insert_files_and_verify( + hosts, os.path.join(mount_dir, f"cont{idx + 1}"), tmp_file_count, tmp_file_name, + tmp_file_size) + + expected = pool_space_before - cont_count * tmp_file_count * tmp_file_size + self.log_step( + "Verify the pool free space <= {expected} after creating {cont_count} containers") + pool_space_after = pool.get_pool_free_space(device) self.log.info("Pool space <= Expected") self.log.info("%s <= %s", pool_space_after, expected) self.assertTrue(pool_space_after <= expected) - self.log.info("Destroying half of the containers = %s", cont_count // 2) + + self.log_step(f"Destroy half of the {cont_count} new sub containers ({cont_count // 2})") for _ in range(cont_count // 2): - self.container[-1].destroy(1) - self.container.pop() - expected = pool_space_after + \ - ((cont_count // 2) * self.tmp_file_count * self.tmp_file_size) - pool_space_after_cont_destroy = \ - pool.get_pool_free_space(self.device) + containers[-1].destroy(1) + containers.pop() + + expected = pool_space_after + ((cont_count // 2) * tmp_file_count * tmp_file_size) + self.log_step( + "Verify the pool free space >= {expected} after destroying half of the containers") + pool_space_after_cont_destroy = pool.get_pool_free_space(device) self.log.info("After container destroy") self.log.info("Free Pool space >= Expected") self.log.info("%s >= %s", pool_space_after_cont_destroy, expected) self.assertTrue(pool_space_after_cont_destroy >= expected) - def insert_files_and_verify(self, container_name): + def insert_files_and_verify(self, hosts, cont_dir, tmp_file_count, tmp_file_name, + tmp_file_size): """Verify inserting files into a specific container. Insert files into the specific container and verify they're navigable and accessible. Args: - container_name: Name of the POSIX Container - file_name_prefix: Prefix of the file name that will be created - no_of_files: Number of files to be created iteratively + hosts (NodeSet): Hosts on which to run the commands + cont_dir (str): container directory + tmp_file_count (int): number of temporary files + tmp_file_name (str): base name for temporary files + tmp_file_size (int): size of temporary files """ - cont_dir = self.dfuse.mount_dir.value - if container_name: - cont_dir = "{}/{}".format(cont_dir, container_name) - cmds = [] ls_cmds = [] - for idx in range(self.tmp_file_count): + for idx in range(tmp_file_count): # Create 40 MB files - file_name = "{}{}".format(self.tmp_file_name, idx + 1) - cmd = "head -c {} /dev/urandom > {}/{}".format( - self.tmp_file_size, cont_dir, file_name) - ls_cmds.append("ls {}".format(file_name)) + file_name = f"{tmp_file_name}{idx + 1}" + cmd = f"head -c {tmp_file_size} /dev/urandom > {cont_dir}/{file_name}" + ls_cmds.append(f"ls {file_name}") cmds.append(cmd) - self._execute_cmd(";".join(cmds)) + self._execute_cmd(";".join(cmds), hosts) cmds = [] # Run ls to verify the temp files are actually created - cmds = ["cd {}".format(cont_dir)] + cmds = [f"cd {cont_dir}"] cmds.extend(ls_cmds) - self._execute_cmd(";".join(cmds)) + self._execute_cmd(";".join(cmds), hosts) - def _execute_cmd(self, cmd): + def _execute_cmd(self, cmd, hosts): """Execute command on the host clients. Args: cmd (str): Command to run + hosts (NodeSet): hosts on which to run the command """ - result = run_remote(self.log, self.dfuse_hosts, cmd, timeout=30) + result = run_remote(self.log, hosts, cmd, timeout=30) if not result.passed: - self.log.error( - "Error running '%s' on the following hosts: %s", cmd, result.failed_hosts) - self.fail("Test was expected to pass but it failed.\n") + self.fail(f"Error running '{cmd}' on {str(result.failed_hosts)}") diff --git a/src/tests/ftest/dfuse/simul.py b/src/tests/ftest/dfuse/simul.py index aef80b5d651..8d2fd9a6700 100644 --- a/src/tests/ftest/dfuse/simul.py +++ b/src/tests/ftest/dfuse/simul.py @@ -6,14 +6,14 @@ import os -from avocado import fail_on -from dfuse_test_base import DfuseTestBase +from apricot import TestWithServers +from dfuse_utils import get_dfuse, start_dfuse from env_modules import load_mpi from exception_utils import MPILoadError -from general_utils import DaosTestError, run_command +from general_utils import run_command -class PosixSimul(DfuseTestBase): +class PosixSimul(TestWithServers): """Tests a posix container with simul. From : https://github.com/LLNL/simul @@ -69,7 +69,6 @@ class PosixSimul(DfuseTestBase): :avocado: recursive """ - @fail_on(DaosTestError) def run_simul(self, include=None, exclude=None, raise_exception=True): """Run simul. @@ -91,39 +90,34 @@ def run_simul(self, include=None, exclude=None, raise_exception=True): simul_path = self.params.get("simul_path", "/run/*", "") # Create a pool - self.log.info("Create a pool") - self.add_pool() + self.log_step("Create a pool") + pool = self.get_pool() # Create a container - self.log.info("Create container") - self.add_container(self.pool) + self.log_step("Create container") + container = self.get_container(pool) # Setup dfuse - dfuse_hosts = self.agent_managers[0].hosts - dfuse_mount_dir = self.params.get("mount_dir", '/run/dfuse/*') - self.start_dfuse(dfuse_hosts, self.pool, self.container) - self.dfuse.check_running() + self.log_step("Start dfuse") + dfuse = get_dfuse(self, self.agent_managers[0].hosts) + start_dfuse(self, dfuse, pool, container) + dfuse.check_running() # The use of MPI here is to run in parallel all simul tests on a single host. if not load_mpi(mpi_type): raise MPILoadError(mpi_type) # Run simul - sumil_cmd = os.path.join(simul_path, "simul") + simul_cmd = os.path.join(simul_path, "simul") if include and not exclude: - cmd = "{0} -vv -d {1} -i {2}".format(sumil_cmd, dfuse_mount_dir, include) + cmd = "{0} -vv -d {1} -i {2}".format(simul_cmd, dfuse.mount_dir.value, include) elif exclude and not include: - cmd = "{0} -vv -d {1} -e {2}".format(sumil_cmd, dfuse_mount_dir, exclude) + cmd = "{0} -vv -d {1} -e {2}".format(simul_cmd, dfuse.mount_dir.value, exclude) else: self.fail("##Both include and exclude tests are selected both or empty.") - self.log.info("Running simul on %s", mpi_type) - try: - result = run_command(cmd, output_check="combined", raise_exception=raise_exception) - finally: - self.stop_dfuse() - - return result + self.log_step("Running simul on %s", mpi_type) + return run_command(cmd, output_check="combined", raise_exception=raise_exception) def test_posix_simul(self): """Test simul. @@ -134,6 +128,7 @@ def test_posix_simul(self): :avocado: tags=PosixSimul,test_posix_simul """ self.run_simul(exclude="9,18,30,39,40") + self.log.info('Test passed') def test_posix_expected_failures(self): """Test simul, expected failures. @@ -154,4 +149,5 @@ def test_posix_expected_failures(self): self.log.info("Test %s was expected to fail, but passed", test) failed.append(test) if failed: - self.fail("Simul tests {} expected to failed, but passed".format(", ".join(failed))) + self.fail(f"Simul tests {', '.join(failed)} expected to failed, but passed") + self.log.info('Test passed') diff --git a/src/tests/ftest/dfuse/sparse_file.py b/src/tests/ftest/dfuse/sparse_file.py index 6b8521614b1..ef31c3816e6 100644 --- a/src/tests/ftest/dfuse/sparse_file.py +++ b/src/tests/ftest/dfuse/sparse_file.py @@ -8,6 +8,7 @@ from getpass import getuser import paramiko +from dfuse_utils import get_dfuse, start_dfuse from general_utils import get_remote_file_size from ior_test_base import IorTestBase @@ -50,14 +51,15 @@ def test_sparsefile(self): # Create a pool, container and start dfuse. self.create_pool() self.create_cont() - self.start_dfuse(self.hostlist_clients, self.pool, self.container) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, self.pool, self.container) # get scm space before write self.space_before = self.pool.get_pool_free_space("nvme") # create large file and perform write to it so that if goes out of # space. - sparse_file = os.path.join(self.dfuse.mount_dir.value, 'sparsefile.txt') + sparse_file = os.path.join(dfuse.mount_dir.value, 'sparsefile.txt') self.execute_cmd("touch {}".format(sparse_file)) self.log.info("File size (in bytes) before truncate: %s", get_remote_file_size(self.hostlist_clients[0], sparse_file)) diff --git a/src/tests/ftest/erasurecode/rebuild_fio.py b/src/tests/ftest/erasurecode/rebuild_fio.py index ffd2d90aa1c..b68d058d927 100644 --- a/src/tests/ftest/erasurecode/rebuild_fio.py +++ b/src/tests/ftest/erasurecode/rebuild_fio.py @@ -1,28 +1,23 @@ ''' - (C) Copyright 2019-2023 Intel Corporation. + (C) Copyright 2019-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent ''' +import queue +import threading import time -from ec_utils import ErasureCodeFio +from dfuse_utils import get_dfuse, start_dfuse +from fio_test_base import FioBase -class EcodFioRebuild(ErasureCodeFio): - # pylint: disable=protected-access +class EcodFioRebuild(FioBase): """Test class Description: Runs Fio with EC object type over POSIX and verify on-line, off-line for rebuild and verify the data. :avocado: recursive """ - def __init__(self, *args, **kwargs): - """Initialize a EcodFioRebuild object.""" - super().__init__(*args, **kwargs) - self.set_online_rebuild = False - self.rank_to_kill = None - self.read_option = self.params.get("rw_read", "/run/fio/test/read_write/*") - def execution(self, rebuild_mode): """Execute test. @@ -31,29 +26,42 @@ def execution(self, rebuild_mode): """ aggregation_threshold = self.params.get("threshold", "/run/pool/aggregation/*") aggregation_timeout = self.params.get("aggr_timeout", "/run/pool/aggregation/*") + read_option = self.params.get("rw_read", "/run/fio/test/read_write/*") + + engine_count = self.server_managers[0].get_config_value("engines_per_host") + server_count = len(self.hostlist_servers) * engine_count + rank_to_kill = server_count - 1 + # 1. Disable aggregation self.log_step("Disable aggregation") - self.pool.disable_aggregation() + pool = self.get_pool() + pool.disable_aggregation() - # 2.a Kill last server rank first - self.log_step("Start fio and kill the last server") - self.rank_to_kill = self.server_count - 1 + # Start dfuse + self.log_step('Starting dfuse') + container = self.get_container(pool) + container.set_attr(attrs={'dfuse-direct-io-disable': 'on'}) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) + + # Write the Fio data and kill the last server rank if rebuild_mode is on-line if 'on-line' in rebuild_mode: - # Enabled on-line rebuild for the test - self.set_online_rebuild = True - # 2.b Write the Fio data and kill server if rebuild_mode is on-line - self.start_online_fio() + self.log_step(f"Start fio and stop the last server rank ({rank_to_kill})") + self.start_online_fio(dfuse.mount_dir.value, rank_to_kill) + else: + self.log_step("Start fio and leave all servers running") + self.start_online_fio(dfuse.mount_dir.value, None) - # 3. Get initial total free space (scm+nvme) + # Get initial total free space (scm+nvme) self.log_step("Get initial total free space (scm+nvme)") - init_free_space = self.pool.get_total_free_space(refresh=True) + init_free_space = pool.get_total_free_space(refresh=True) - # 4. Enable aggregation + # Enable aggregation self.log_step("Enable aggregation") - self.pool.enable_aggregation() + pool.enable_aggregation() - # 5. Get total space consumed (scm+nvme) after aggregation enabled, verify and wait until - # aggregation triggered, maximum 3 minutes. + # Get total space consumed (scm+nvme) after aggregation enabled, verify and wait until + # aggregation triggered, maximum 3 minutes. self.log_step("Verify the Fio write finish without any error") start_time = time.time() timed_out = False @@ -61,7 +69,7 @@ def execution(self, rebuild_mode): self.log_step("Verify and wait until aggregation triggered") while not aggr_triggered and not timed_out: # Check if current free space exceeds threshold - free_space = self.pool.get_total_free_space(refresh=True) + free_space = pool.get_total_free_space(refresh=True) difference = free_space - init_free_space aggr_triggered = difference >= aggregation_threshold self.log.debug("Total Free space: initial=%s, current=%s, difference=%s", @@ -72,36 +80,86 @@ def execution(self, rebuild_mode): if not aggr_triggered and not timed_out: time.sleep(1) if timed_out: - self.fail("Aggregation not observed within {} seconds".format(aggregation_timeout)) + self.fail(f"Aggregation not observed within {aggregation_timeout} seconds") # ec off-line rebuild fio if 'off-line' in rebuild_mode: - self.log_step("Stop rank for ec off-line rebuild fio") - self.server_managers[0].stop_ranks( - [self.server_count - 1], self.d_log, force=True) + self.log_step(f"Stop the last server rank ({rank_to_kill}) for ec off-line rebuild fio") + self.server_managers[0].stop_ranks([rank_to_kill], self.d_log, force=True) - # 6. Adding unlink option for final read command + # Adding unlink option for final read command self.log_step("Adding unlink option for final read command") - if int(self.container.properties.value.split(":")[1]) == 1: - self.fio_cmd._jobs['test'].unlink.value = 1 + if int(container.properties.value.split(":")[1]) == 1: + self.fio_cmd._jobs['test'].unlink.value = 1 # pylint: disable=protected-access - # 7. Read and verify the original data. + # Read and verify the original data. self.log_step("Read and verify the original data.") - self.fio_cmd._jobs['test'].rw.value = self.read_option + self.fio_cmd._jobs['test'].rw.value = read_option # pylint: disable=protected-access self.fio_cmd.run() - # 8. If RF is 2 kill one more server and validate the data is not corrupted. - self.log_step("If RF is 2 kill one more server and validate the data is not corrupted.") - if int(self.container.properties.value.split(":")[1]) == 2: - self.fio_cmd._jobs['test'].unlink.value = 1 - self.log.info("RF is 2,So kill another server and verify data") + # If RF is 2 kill one more server and validate the data is not corrupted. + if int(container.properties.value.split(":")[1]) == 2: # Kill one more server rank - self.server_managers[0].stop_ranks([self.server_count - 2], self.d_log, force=True) + rank_to_kill = server_count - 2 + self.log_step(f"Kill one more server rank {rank_to_kill} when RF=2") + self.fio_cmd._jobs['test'].unlink.value = 1 # pylint: disable=protected-access + self.server_managers[0].stop_ranks([rank_to_kill], self.d_log, force=True) + # Read and verify the original data. + self.log_step(f"Verify the data is not corrupted after stopping rank {rank_to_kill}.") self.fio_cmd.run() # Pre-teardown: make sure rebuild is done before too-quickly trying to destroy container. - self.pool.wait_for_rebuild_to_end() + pool.wait_for_rebuild_to_end() + + self.log.info("Test passed") + + def start_online_fio(self, directory, rank_to_kill=None): + """Run Fio operation with thread in background. + + Trigger the server failure while Fio is running + + Args: + directory (str): directory to use with the fio command + rank_to_kill (int, optional): the server rank to kill while IO operation is in progress. + Set to None to leave all servers running during IO. Defaults to None. + """ + results_queue = queue.Queue() + + # Create the Fio run thread + job = threading.Thread( + target=self.write_single_fio_dataset, + kwargs={"directory": directory, "results": results_queue}) + + # Launch the Fio thread + job.start() + + # Kill the server rank while IO operation in progress + if rank_to_kill is not None: + time.sleep(30) + self.server_managers[0].stop_ranks([rank_to_kill], self.d_log, force=True) + + # Wait to finish the thread + job.join() + + # Verify the queue result and make sure test has no failure + while not results_queue.empty(): + if results_queue.get() == "FAIL": + self.fail("Error running fio as a thread") + + def write_single_fio_dataset(self, directory, results): + """Run Fio Benchmark. + + Args: + directory (str): directory to use with the fio command + results (queue): queue for returning thread results + """ + try: + self.fio_cmd.update_directory(directory) + self.execute_fio() + results.put("PASS") + except Exception: # pylint: disable=broad-except + results.put("FAIL") def test_ec_online_rebuild_fio(self): """Jira ID: DAOS-7320. diff --git a/src/tests/ftest/erasurecode/truncate.py b/src/tests/ftest/erasurecode/truncate.py index dc8ff91f67a..bb570755bc8 100644 --- a/src/tests/ftest/erasurecode/truncate.py +++ b/src/tests/ftest/erasurecode/truncate.py @@ -1,10 +1,11 @@ ''' - (C) Copyright 2019-2023 Intel Corporation. + (C) Copyright 2019-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent ''' import os +from dfuse_utils import get_dfuse, start_dfuse from fio_test_base import FioBase from general_utils import get_remote_file_size, run_pcmd @@ -40,11 +41,16 @@ def test_ec_truncate(self): fname = self.params.get("names", '/run/fio/*') # Write the file using Fio - self.execute_fio(stop_dfuse=False) + pool = self.get_pool(connect=False) + container = self.get_container(pool) + container.set_attr(attrs={'dfuse-direct-io-disable': 'on'}) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) + self.fio_cmd.update_directory(dfuse.mount_dir.value) + self.execute_fio() # Get the fuse file name. - testfile = "{}.0.0".format(os.path.join(self.dfuse.mount_dir.value, - fname[0])) + testfile = "{}.0.0".format(os.path.join(dfuse.mount_dir.value, fname[0])) original_fs = int(self.fio_cmd._jobs['test'].size.value) # Read and verify the original data. @@ -56,8 +62,8 @@ def test_ec_truncate(self): self.assertEqual(original_fs, file_size) # Truncate the original file which will extend the size of file. - result = run_pcmd(self.hostlist_clients, "truncate -s {} {}" - .format(truncate_size, testfile)) + result = run_pcmd( + self.hostlist_clients, "truncate -s {} {}".format(truncate_size, testfile)) if result[0]["exit_status"] == 1: self.fail("Failed to truncate file {}".format(testfile)) @@ -69,8 +75,8 @@ def test_ec_truncate(self): self.fio_cmd.run() # Truncate the original file and shrink to original size. - result = run_pcmd(self.hostlist_clients, "truncate -s {} {}" - .format(original_fs, testfile)) + result = run_pcmd( + self.hostlist_clients, "truncate -s {} {}".format(original_fs, testfile)) if result[0]["exit_status"] == 1: self.fail("Failed to truncate file {}".format(testfile)) diff --git a/src/tests/ftest/fault_injection/ec.py b/src/tests/ftest/fault_injection/ec.py index 873ddd1741c..f4bcbc37a38 100644 --- a/src/tests/ftest/fault_injection/ec.py +++ b/src/tests/ftest/fault_injection/ec.py @@ -3,6 +3,7 @@ SPDX-License-Identifier: BSD-2-Clause-Patent ''' +from dfuse_utils import get_dfuse, start_dfuse from fio_test_base import FioBase from ior_test_base import IorTestBase @@ -51,4 +52,10 @@ def test_ec_fio_fault(self): :avocado: tags=ec,ec_array,ec_fio_fault,faults,fio :avocado: tags=EcodFaultInjection,test_ec_fio_fault """ + pool = self.get_pool(connect=False) + container = self.get_container(pool) + container.set_attr(attrs={'dfuse-direct-io-disable': 'on'}) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) + self.fio_cmd.update_directory(dfuse.mount_dir.value) self.execute_fio() diff --git a/src/tests/ftest/io/macsio_test.py b/src/tests/ftest/io/macsio_test.py index 01daf179712..14b7595381f 100644 --- a/src/tests/ftest/io/macsio_test.py +++ b/src/tests/ftest/io/macsio_test.py @@ -3,12 +3,12 @@ SPDX-License-Identifier: BSD-2-Clause-Patent """ -from dfuse_test_base import DfuseTestBase +from dfuse_utils import get_dfuse, start_dfuse from general_utils import list_to_str from macsio_test_base import MacsioTestBase -class MacsioTest(DfuseTestBase, MacsioTestBase): +class MacsioTest(MacsioTestBase): """Test class Description: Runs a basic MACSio test. :avocado: recursive @@ -33,17 +33,18 @@ def test_macsio(self): processes = self.params.get("processes", "/run/macsio/*", len(self.hostlist_clients)) # Create a pool - self.add_pool() - self.pool.display_pool_daos_space() + self.log_step('Create a single pool') + pool = self.get_pool() + pool.display_pool_daos_space() # Create a container - self.add_container(self.pool) + self.log_step('Create a single container') + container = self.get_container(pool) # Run macsio - self.log.info("Running MACSio") + self.log_step("Running MACSio") status = self.macsio.check_results( - self.run_macsio( - self.pool.uuid, list_to_str(self.pool.svc_ranks), processes, self.container.uuid), + self.run_macsio(pool.uuid, list_to_str(pool.svc_ranks), processes, container.uuid), self.hostlist_clients) if status: self.log.info("Test passed") @@ -68,26 +69,29 @@ def test_macsio_daos_vol(self): processes = self.params.get("processes", "/run/macsio/*", len(self.hostlist_clients)) # Create a pool - self.add_pool() - self.pool.display_pool_daos_space() + self.log_step('Create a single pool') + pool = self.get_pool() + pool.display_pool_daos_space() # Create a container - self.add_container(self.pool) + self.log_step('Create a single container') + container = self.get_container(pool) # Create dfuse mount point - self.start_dfuse(self.hostlist_clients, self.pool, self.container) + self.log_step('Starting dfuse') + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) # VOL needs to run from a file system that supports xattr. Currently # nfs does not have this attribute so it was recommended to create and # use a dfuse dir and run vol tests from there. - self.job_manager.working_dir.value = self.dfuse.mount_dir.value + self.job_manager.working_dir.value = dfuse.mount_dir.value # Run macsio - self.log.info("Running MACSio with DAOS VOL connector") + self.log_step("Running MACSio with DAOS VOL connector") status = self.macsio.check_results( self.run_macsio( - self.pool.uuid, list_to_str(self.pool.svc_ranks), processes, self.container.uuid, - plugin_path), + pool.uuid, list_to_str(pool.svc_ranks), processes, container.uuid, plugin_path), self.hostlist_clients) if status: self.log.info("Test passed") diff --git a/src/tests/ftest/io/parallel_io.py b/src/tests/ftest/io/parallel_io.py index b181da47c6c..7be497dd28f 100644 --- a/src/tests/ftest/io/parallel_io.py +++ b/src/tests/ftest/io/parallel_io.py @@ -10,6 +10,7 @@ import time from getpass import getuser +from dfuse_utils import get_dfuse, start_dfuse from exception_utils import CommandFailure from fio_test_base import FioBase from ior_test_base import IorTestBase @@ -25,7 +26,6 @@ class ParallelIo(FioBase, IorTestBase): def __init__(self, *args, **kwargs): """Initialize a ParallelIo object.""" super().__init__(*args, **kwargs) - self.dfuse = None self.cont_count = None self.pool_count = None self.statvfs_info_initial = None @@ -76,7 +76,7 @@ def statvfs_pool(self, path): return statvfs_list - def verify_aggregation(self, reduced_space, count): + def verify_aggregation(self, dfuse, reduced_space, count): """Verify aggregation. Verify if expected space is returned for each pool after containers @@ -84,6 +84,7 @@ def verify_aggregation(self, reduced_space, count): otherwise exit the test with a failure. Args: + dfuse (Dfuse): the dfuse object reduced_space (int): expected space to be returned count (int): aggregation index """ @@ -101,8 +102,7 @@ def verify_aggregation(self, reduced_space, count): self.statvfs_after_cont_destroy) self.fail("Aggregation did not complete as expected") time.sleep(60) - self.statvfs_after_cont_destroy = self.statvfs_pool( - self.dfuse.mount_dir.value) + self.statvfs_after_cont_destroy = self.statvfs_pool(dfuse.mount_dir.value) counter += 1 def test_parallelio(self): @@ -133,14 +133,16 @@ def test_parallelio(self): # Create a pool and start dfuse. self.create_pool() - self.start_dfuse(self.hostlist_clients, self.pool[0], None) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, self.pool[0]) + # create multiple containers self.add_container_qty(self.cont_count, self.pool[0]) # check if all the created containers can be accessed and perform # io on each container using fio in parallel for _, cont in enumerate(self.container): - dfuse_cont_dir = self.dfuse.mount_dir.value + "/" + cont.uuid + dfuse_cont_dir = dfuse.mount_dir.value + "/" + cont.uuid cmd = "ls -a {}".format(dfuse_cont_dir) # execute bash cmds result = run_remote(self.log, self.hostlist_clients, cmd, timeout=30) @@ -148,8 +150,8 @@ def test_parallelio(self): self.fail("Error running '{}' on the following hosts: {}".format( cmd, result.failed_hosts)) # run fio on all containers - thread = threading.Thread(target=self.execute_fio, args=( - self.dfuse.mount_dir.value + "/" + cont.uuid, False)) + self.fio_cmd.update_directory(os.path.join(dfuse.mount_dir.value, cont.uuid)) + thread = threading.Thread(target=self.execute_fio) threads.append(thread) thread.start() @@ -162,19 +164,18 @@ def test_parallelio(self): self.container[0].destroy(1) # check dfuse if it is running fine - self.dfuse.check_running() + dfuse.check_running() # try accessing destroyed container, it should fail try: - self.execute_fio( - self.dfuse.mount_dir.value + "/" + container_to_destroy, False) + self.fio_cmd.update_directory(os.path.join(dfuse.mount_dir.value, container_to_destroy)) + self.execute_fio() self.fail( - "Fio was able to access destroyed container: {}".format( - self.container[0].uuid)) + "Fio was able to access destroyed container: {}".format(self.container[0].uuid)) except CommandFailure: self.log.info("fio failed as expected") # check dfuse is still running after attempting to access deleted container - self.dfuse.check_running() + dfuse.check_running() def test_multipool_parallelio(self): """Jira ID: DAOS-3775. @@ -220,10 +221,11 @@ def test_multipool_parallelio(self): pool_job.join() # start dfuse. - self.start_dfuse(self.hostlist_clients, None, None) + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse) # record free space using statvfs before any data is written. - self.statvfs_info_initial = self.statvfs_pool(self.dfuse.mount_dir.value) + self.statvfs_info_initial = self.statvfs_pool(dfuse.mount_dir.value) # Create 10 containers for each pool. Container create process cannot # be parallelized as different container create could complete at @@ -237,7 +239,7 @@ def test_multipool_parallelio(self): # using ior. This process of performing io is done in parallel for # all containers using threads. for pool_count, pool in enumerate(self.pool): - dfuse_pool_dir = str(self.dfuse.mount_dir.value + "/" + pool.uuid) + dfuse_pool_dir = str(dfuse.mount_dir.value + "/" + pool.uuid) for counter in range(self.cont_count): cont_num = (pool_count * self.cont_count) + counter dfuse_cont_dir = str(dfuse_pool_dir + "/" + self.container[cont_num].uuid) @@ -260,8 +262,7 @@ def test_multipool_parallelio(self): job.join() # Record free space after io - self.statvfs_before_cont_destroy = self.statvfs_pool( - self.dfuse.mount_dir.value) + self.statvfs_before_cont_destroy = self.statvfs_pool(dfuse.mount_dir.value) # Destroy half of the containers from each pool pfinal = 0 @@ -279,8 +280,7 @@ def test_multipool_parallelio(self): destroy_job.join() # Record free space after container destroy. - self.statvfs_after_cont_destroy = self.statvfs_pool( - self.dfuse.mount_dir.value) + self.statvfs_after_cont_destroy = self.statvfs_pool(dfuse.mount_dir.value) # Calculate the expected space to be returned after containers # are destroyed. diff --git a/src/tests/ftest/server/replay.py b/src/tests/ftest/server/replay.py index 5b0124065f7..09df22ddf1c 100644 --- a/src/tests/ftest/server/replay.py +++ b/src/tests/ftest/server/replay.py @@ -7,7 +7,7 @@ import time from apricot import TestWithServers -from dfuse_utils import get_dfuse, start_dfuse, stop_dfuse +from dfuse_utils import get_dfuse, start_dfuse from general_utils import join from ior_utils import read_data, write_data from test_utils_pool import add_pool @@ -137,7 +137,7 @@ def test_replay_posix(self): ior = write_data(self, container, dfuse=dfuse) self.log_step('After the read has completed, unmount dfuse') - stop_dfuse(self, dfuse) + dfuse.stop() self.stop_engines() self.restart_engines() diff --git a/src/tests/ftest/util/data_mover_test_base.py b/src/tests/ftest/util/data_mover_test_base.py index bfed9571c32..adf74ca9bdd 100644 --- a/src/tests/ftest/util/data_mover_test_base.py +++ b/src/tests/ftest/util/data_mover_test_base.py @@ -12,6 +12,7 @@ from command_utils_base import BasicParameter, EnvironmentVariables from data_mover_utils import (ContClone, DcpCommand, DdeserializeCommand, DserializeCommand, DsyncCommand, FsCopy, uuid_from_obj) +from dfuse_utils import get_dfuse, start_dfuse from duns_utils import format_path from exception_utils import CommandFailure from general_utils import create_string_buffer, get_log_file @@ -995,8 +996,8 @@ def run_dm_activities_with_ior(self, tool, pool, cont, create_dataset=False): tool (str): specify the tool name to be used pool (TestPool): source pool object cont (TestContainer): source container object - create_dataset (bool): boolean to create initial set of - data using ior. Defaults to False. + create_dataset (bool, optional): boolean to create initial set of data using ior. + Defaults to False. """ # Set the tool to use self.set_tool(tool) @@ -1021,7 +1022,8 @@ def run_dm_activities_with_ior(self, tool, pool, cont, create_dataset=False): pool2 = self.get_pool() # Use dfuse as a shared intermediate for serialize + deserialize dfuse_cont = self.get_container(pool, oclass=self.ior_cmd.dfs_oclass.value) - self.start_dfuse(self.dfuse_hosts, pool, dfuse_cont) + self.dfuse = get_dfuse(self, self.dfuse_hosts) + start_dfuse(self, self.dfuse, pool, dfuse_cont) self.serial_tmp_dir = self.dfuse.mount_dir.value # Serialize/Deserialize container 1 to a new cont2 in pool2 diff --git a/src/tests/ftest/util/dfuse_test_base.py b/src/tests/ftest/util/dfuse_test_base.py deleted file mode 100644 index df1f6c8d721..00000000000 --- a/src/tests/ftest/util/dfuse_test_base.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -(C) Copyright 2020-2022 Intel Corporation. - -SPDX-License-Identifier: BSD-2-Clause-Patent -""" -from agent_utils import include_local_host -from apricot import TestWithServers -from dfuse_utils import get_dfuse, start_dfuse -from exception_utils import CommandFailure - - -class DfuseTestBase(TestWithServers): - """Runs Dfuse test suites. - - :avocado: recursive - """ - - def __init__(self, *args, **kwargs): - """Initialize a TestWithServers object.""" - super().__init__(*args, **kwargs) - self.dfuse = None - - def setUp(self): - """Set up the test case.""" - super().setUp() - # using localhost as client if client list is empty - if not self.hostlist_clients: - self.hostlist_clients = include_local_host(None) - - def stop_job_managers(self): - """Stop the test job manager followed by dfuse. - - Returns: - list: a list of exceptions raised stopping the agents - - """ - error_list = super().stop_job_managers() - try: - self.stop_dfuse() - except CommandFailure as error: - error_list.append("Error stopping dfuse: {}".format(error)) - return error_list - - def load_dfuse(self, hosts, namespace=None): - """Create a DfuseCommand object - - Args: - hosts (NodeSet): hosts on which to start Dfuse - namespace (str, optional): dfuse namespace. Defaults to None - """ - self.dfuse = get_dfuse(self, hosts, namespace) - - def start_dfuse(self, hosts, pool=None, container=None, **params): - """Create a DfuseCommand object and use it to start Dfuse. - - Args: - hosts (NodeSet): hosts on which to start Dfuse - pool (TestPool, optional): pool to mount. Defaults to None - container (TestContainer, optional): container to mount. Defaults to None - params (Object, optional): Dfuse command arguments to update - """ - if self.dfuse is None: - self.load_dfuse(hosts) - start_dfuse(self, self.dfuse, pool=pool, container=container, **params) - - def stop_dfuse(self): - """Stop Dfuse and unset the DfuseCommand object.""" - if self.dfuse: - self.dfuse.stop() - self.dfuse = None diff --git a/src/tests/ftest/util/ec_utils.py b/src/tests/ftest/util/ec_utils.py index 7b49629f913..0d7c1414df8 100644 --- a/src/tests/ftest/util/ec_utils.py +++ b/src/tests/ftest/util/ec_utils.py @@ -11,8 +11,7 @@ from apricot import TestWithServers from daos_utils import DaosCommand from exception_utils import CommandFailure -from fio_test_base import FioBase -from general_utils import DaosTestError, run_pcmd +from general_utils import DaosTestError from mdtest_test_base import MdtestBase from nvme_utils import ServerFillUp from pydaos.raw import DaosApiError @@ -462,96 +461,3 @@ def start_online_mdtest(self): while not self.out_queue.empty(): if self.out_queue.get() == "Mdtest Failed": self.fail("FAIL") - - -class ErasureCodeFio(FioBase): - """Class to use for EC testing with Fio Benchmark.""" - - def __init__(self, *args, **kwargs): - """Initialize a FioBase object.""" - super().__init__(*args, **kwargs) - self.server_count = None - self.set_online_rebuild = False - self.rank_to_kill = None - - def setUp(self): - """Set up each test case.""" - super().setUp() - engine_count = self.server_managers[0].get_config_value("engines_per_host") - self.server_count = len(self.hostlist_servers) * engine_count - - # Create Pool - self.add_pool() - self.out_queue = queue.Queue() - - def stop_job_managers(self): - """Cleanup dfuse in case of test failure.""" - error_list = [] - dfuse_cleanup_cmd = ["pkill dfuse --signal KILL", - "fusermount3 -uz {}".format(self.dfuse.mount_dir.value)] - - for cmd in dfuse_cleanup_cmd: - results = run_pcmd(self.hostlist_clients, cmd) - for result in results: - if result["exit_status"] != 0: - error_list.append("Errors detected during cleanup cmd %s on node %s", - cmd, str(result["hosts"])) - error_list.extend(super().stop_job_managers()) - return error_list - - def write_single_fio_dataset(self, results): - """Run Fio Benchmark. - - Args: - results (queue): queue for returning thread results - """ - try: - self.execute_fio(stop_dfuse=False) - if results is not None: - results.put("PASS") - except (CommandFailure, DaosApiError, DaosTestError): - if results is not None: - results.put("FAIL") - raise - - def start_online_fio(self): - """Run Fio operation with thread in background. - - Trigger the server failure while Fio is running - """ - # Create the Fio run thread - job = threading.Thread(target=self.write_single_fio_dataset, - kwargs={"results": self.out_queue}) - - # Launch the Fio thread - job.start() - - # Kill the server rank while IO operation in progress - if self.set_online_rebuild: - time.sleep(30) - # Kill the server rank - if self.rank_to_kill is not None: - self.server_managers[0].stop_ranks([self.rank_to_kill], - self.d_log, - force=True) - - # Wait to finish the thread - job.join() - - # Verify the queue result and make sure test has no failure - while not self.out_queue.empty(): - if self.out_queue.get() == "FAIL": - self.fail("FAIL") - - def check_aggregation_status(self, quick_check=True, attempt=20): - """EC Aggregation triggered status. - - Args: - quick_check (bool): Return immediately when Aggregation starts for any storage type. - attempt (int): Number of attempts to do pool query at interval of 5 seconds. - default is 20 attempts. - - Returns: - dict: Storage Aggregation stats SCM/NVMe True/False. - """ - return check_aggregation_status(self.log, self.pool, quick_check, attempt) diff --git a/src/tests/ftest/util/fio_test_base.py b/src/tests/ftest/util/fio_test_base.py index 4044d71c847..93c802718c3 100644 --- a/src/tests/ftest/util/fio_test_base.py +++ b/src/tests/ftest/util/fio_test_base.py @@ -1,13 +1,13 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ -from dfuse_test_base import DfuseTestBase +from apricot import TestWithServers from fio_utils import FioCommand -class FioBase(DfuseTestBase): +class FioBase(TestWithServers): """Base fio class. :avocado: recursive @@ -34,37 +34,7 @@ def setUp(self): self.processes = self.params.get("np", '/run/fio/client_processes/*') self.manager = self.params.get("manager", '/run/fio/*', "MPICH") - def execute_fio(self, directory=None, stop_dfuse=True): - """Runner method for Fio. - - Args: - directory (str): path for fio run dir - stop_dfuse (bool): Flag to stop or not stop dfuse as part of this method. - """ - # Create a pool if one does not already exist - if self.pool is None: - self.add_pool(connect=False) - - # start dfuse if api is POSIX - if self.fio_cmd.api.value == "POSIX": - if directory: - self.fio_cmd.update( - "global", "directory", directory, - "fio --name=global --directory") - else: - self.add_container(self.pool) - - # Instruct dfuse to disable direct-io for this container - self.container.set_attr(attrs={'dfuse-direct-io-disable': 'on'}) - - self.start_dfuse(self.hostlist_clients, self.pool, self.container) - self.fio_cmd.update( - "global", "directory", self.dfuse.mount_dir.value, - "fio --name=global --directory") - - # Run Fio + def execute_fio(self): + """Runner method for Fio.""" self.fio_cmd.hosts = self.hostlist_clients self.fio_cmd.run() - - if stop_dfuse: - self.stop_dfuse() diff --git a/src/tests/ftest/util/fio_utils.py b/src/tests/ftest/util/fio_utils.py index 1a511257afd..983f2255841 100644 --- a/src/tests/ftest/util/fio_utils.py +++ b/src/tests/ftest/util/fio_utils.py @@ -135,6 +135,14 @@ def update(self, job_name, param_name, value, description=None): else: self.log.error("Invalid job name: %s", job_name) + def update_directory(self, directory): + """Helper method for setting Fio directory command line option. + + Args: + directory (str): fio directory argument value + """ + self.update("global", "directory", directory, "fio --name=global --directory") + @property def command_with_params(self): """Get the command with all of its defined parameters as a string. diff --git a/src/tests/ftest/util/ior_test_base.py b/src/tests/ftest/util/ior_test_base.py index 0e1d1bdbf8d..8290dd254ed 100644 --- a/src/tests/ftest/util/ior_test_base.py +++ b/src/tests/ftest/util/ior_test_base.py @@ -1,19 +1,21 @@ """ -(C) Copyright 2018-2023 Intel Corporation. +(C) Copyright 2018-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ import os +from apricot import TestWithServers from ClusterShell.NodeSet import NodeSet -from dfuse_test_base import DfuseTestBase +from dfuse_utils import get_dfuse, start_dfuse from exception_utils import CommandFailure from general_utils import get_random_string, pcmd +from host_utils import get_local_host from ior_utils import IorCommand from job_manager_utils import get_job_manager -class IorTestBase(DfuseTestBase): +class IorTestBase(TestWithServers): """Base IOR test class. :avocado: recursive @@ -31,6 +33,7 @@ def __init__(self, *args, **kwargs): self.container = None self.ior_timeout = None self.ppn = None + self.dfuse = None def setUp(self): """Set up each test case.""" @@ -47,6 +50,10 @@ def setUp(self): self.subprocess = self.params.get("subprocess", '/run/ior/*', False) self.ior_timeout = self.params.get("ior_timeout", '/run/ior/*', None) + # Use the local host as a client for hostfile/dfuse if the client list is empty + if not self.hostlist_clients: + self.hostlist_clients = get_local_host() + def create_pool(self): """Create a TestPool object to use with ior.""" # Get the pool params and create a pool @@ -118,7 +125,7 @@ def run_ior_with_pool(self, intercept=None, display_space=True, test_file_suffix # start dfuse if api is POSIX or HDF5 with vol connector if (self.ior_cmd.api.value == "POSIX" or plugin_path) and not self.dfuse: # Initialize dfuse instance - self.load_dfuse(self.hostlist_clients) + self.dfuse = get_dfuse(self, self.hostlist_clients) # Default mount_dir to value in dfuse instance mount_dir = mount_dir or self.dfuse.mount_dir.value # Add a substring in case of HDF5-VOL @@ -126,7 +133,7 @@ def run_ior_with_pool(self, intercept=None, display_space=True, test_file_suffix sub_dir = get_random_string(5) mount_dir = os.path.join(mount_dir, sub_dir) # Connect to the pool, create container and then start dfuse - self.start_dfuse(self.hostlist_clients, self.pool, self.container, mount_dir=mount_dir) + start_dfuse(self, self.dfuse, self.pool, self.container, mount_dir=mount_dir) # setup test file for POSIX or HDF5 with vol connector if self.ior_cmd.api.value == "POSIX" or plugin_path: @@ -144,8 +151,9 @@ def run_ior_with_pool(self, intercept=None, display_space=True, test_file_suffix fail_on_warning=fail_on_warning, out_queue=out_queue, env=env) finally: - if stop_dfuse: - self.stop_dfuse() + if stop_dfuse and self.dfuse: + self.dfuse.stop() + self.dfuse = None return out diff --git a/src/tests/ftest/util/mdtest_test_base.py b/src/tests/ftest/util/mdtest_test_base.py index b17cd3d90d7..10d92ac4636 100644 --- a/src/tests/ftest/util/mdtest_test_base.py +++ b/src/tests/ftest/util/mdtest_test_base.py @@ -6,13 +6,14 @@ import os -from dfuse_test_base import DfuseTestBase +from apricot import TestWithServers +from dfuse_utils import get_dfuse, start_dfuse from exception_utils import CommandFailure from job_manager_utils import get_job_manager from mdtest_utils import MdtestCommand -class MdtestBase(DfuseTestBase): +class MdtestBase(TestWithServers): """Base mdtest class. :avocado: recursive @@ -27,6 +28,13 @@ def __init__(self, *args, **kwargs): self.hostfile_clients_slots = None self.subprocess = False + # We should not be using these as class level variables, but are needed until the + # execute_mdtest() method can be redesigned to pass in these arguments instead of + # optionally defining them + self.pool = None + self.container = None + self.dfuse = None + def setUp(self): """Set up each test case.""" # obtain separate logs @@ -83,7 +91,8 @@ def execute_mdtest(self, out_queue=None, display_space=True): # start dfuse if api is POSIX if self.mdtest_cmd.api.value == "POSIX": - self.start_dfuse(self.hostlist_clients, self.pool, self.container) + self.dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, self.dfuse, self.pool, self.container) self.mdtest_cmd.test_dir.update(self.dfuse.mount_dir.value) # Run Mdtest @@ -99,7 +108,10 @@ def execute_mdtest(self, out_queue=None, display_space=True): self.container.skip_cleanup() # Need to set self.container to None to force a creation of a new container self.container = None - self.stop_dfuse() + + if self.dfuse is not None: + self.dfuse.stop() + self.dfuse = None return out diff --git a/src/tests/ftest/util/performance_test_base.py b/src/tests/ftest/util/performance_test_base.py index 4fdb8ae06d4..3196242bed1 100644 --- a/src/tests/ftest/util/performance_test_base.py +++ b/src/tests/ftest/util/performance_test_base.py @@ -342,7 +342,9 @@ def run_performance_ior(self, namespace=None, use_intercept=True): self._run_performance_ior_single(intercept) # Manually stop dfuse after ior write completes - self.stop_dfuse() + if self.dfuse: + self.dfuse.stop() + self.dfuse = None # Wait between write and read self.phase_barrier() @@ -352,7 +354,9 @@ def run_performance_ior(self, namespace=None, use_intercept=True): self._run_performance_ior_single(intercept) # Manually stop dfuse after ior read completes - self.stop_dfuse() + if self.dfuse: + self.dfuse.stop() + self.dfuse = None self._log_daos_metrics() @@ -438,6 +442,8 @@ def run_performance_mdtest(self, namespace=None): self.verify_system_status(self.pool, self.container) # Manually stop dfuse after mdtest completes - self.stop_dfuse() + if self.dfuse: + self.dfuse.stop() + self.dfuse = None self._log_daos_metrics() diff --git a/src/tests/ftest/util/vol_test_base.py b/src/tests/ftest/util/vol_test_base.py index b45bcf62082..ad9f7b2ca51 100644 --- a/src/tests/ftest/util/vol_test_base.py +++ b/src/tests/ftest/util/vol_test_base.py @@ -1,16 +1,17 @@ """ -(C) Copyright 2020-2023 Intel Corporation. +(C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ +from apricot import TestWithServers from command_utils import ExecutableCommand from command_utils_base import EnvironmentVariables -from dfuse_test_base import DfuseTestBase +from dfuse_utils import get_dfuse, start_dfuse from exception_utils import CommandFailure -class VolTestBase(DfuseTestBase): +class VolTestBase(TestWithServers): """Runs HDF5 vol test-suites. :avocado: recursive @@ -29,14 +30,17 @@ def run_test(self, job_manager, plugin_path, test_repo): client_processes = self.params.get("client_processes") # create pool, container and dfuse mount - self.add_pool(connect=False) - self.add_container(self.pool) + self.log_step('Creating a single pool and container') + pool = self.get_pool(connect=False) + container = self.get_container(pool) # VOL needs to run from a file system that supports xattr. # Currently nfs does not have this attribute so it was recommended # to create a dfuse dir and run vol tests from there. # create dfuse container - self.start_dfuse(self.hostlist_clients, self.pool, self.container) + self.log_step('Starting dfuse so VOL can run from a file system that supports xattr') + dfuse = get_dfuse(self, self.hostlist_clients) + start_dfuse(self, dfuse, pool, container) # Assign the test to run job_manager.job = ExecutableCommand( @@ -44,17 +48,19 @@ def run_test(self, job_manager, plugin_path, test_repo): check_results=["FAILED", "stderr"]) env = EnvironmentVariables() - env["DAOS_POOL"] = "{}".format(self.pool.uuid) - env["DAOS_CONT"] = "{}".format(self.container.uuid) + env["DAOS_POOL"] = "{}".format(pool.uuid) + env["DAOS_CONT"] = "{}".format(container.uuid) env["HDF5_VOL_CONNECTOR"] = "daos" env["HDF5_PLUGIN_PATH"] = "{}".format(plugin_path) job_manager.assign_hosts(self.hostlist_clients) job_manager.assign_processes(client_processes) job_manager.assign_environment(env, True) - job_manager.working_dir.value = self.dfuse.mount_dir.value + job_manager.working_dir.value = dfuse.mount_dir.value # run VOL Command + self.log_step(f'Running {job_manager.job.command}') try: job_manager.run() - except CommandFailure as _error: - self.fail("{} FAILED> \nException occurred: {}".format(job_manager.job, str(_error))) + except CommandFailure as error: + self.log.error(str(error)) + self.fail(f"{job_manager.job.command} failed")