diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 3c7b89fb..84f0b48c 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -1,6 +1,18 @@ Release Notes ============= +v1.6.0 +------ + +* Add warning about calling ``open()`` or ``copy()`` with filesystem paths that + are ambiguous. In the next version of stor, this will be an error, to prevent + differences between local and OBS code. Set + ``stor:raise_on_ambiguous_filesystem_path`` to get this behavior now. +* ``open()`` now raises with an invalid OBS path, rather than generating a + potentially harmful object. +* Add ``stor.test.TestCase`` that allows mocking out of both swift AND s3 + functionality in a test case. + v1.5.4 ------ diff --git a/stor/base.py b/stor/base.py index bae3dca5..46c7f478 100644 --- a/stor/base.py +++ b/stor/base.py @@ -267,6 +267,8 @@ def open(self, *args, **kwargs): Args: mode (str): first positional arg, mode of file descriptor encoding (str): text encoding to use (Python 3 only) + Raises: + ValueError: if ambiguous path - see ``stor.utils.validate_file_path`` for more. """ raise NotImplementedError @@ -370,14 +372,17 @@ class FileSystemPath(Path): """ def open(self, *args, **kwargs): """ - Opens a path and retains interface compatibility with - `SwiftPath` by popping the unused ``swift_upload_args`` keyword - argument. + Opens a path. Creates parent directory if it does not exist. + + Raises: + ValueError: if ambiguous path - see ``stor.utils.validate_file_path`` for more. """ - kwargs.pop('swift_upload_kwargs', None) + if kwargs.pop('swift_upload_kwargs', None): + warnings.warn('swift_upload_kwargs will be removed in stor 2.0') self.parent.makedirs_p() + utils.validate_file_path(self) return builtins.open(self, *args, **kwargs) def __enter__(self): diff --git a/stor/default.cfg b/stor/default.cfg index 82ee1afd..3211d10c 100644 --- a/stor/default.cfg +++ b/stor/default.cfg @@ -1,5 +1,8 @@ [stor] +# set to raise if file paths are ambiguous - see stor.utils.validate_file_path for more. +raise_on_ambiguous_filesystem_path = False + [s3] # See boto3 docs for more detail on these parameters - all passed directly to boto3.session.Session *if* set diff --git a/stor/obs.py b/stor/obs.py index 5d79bb13..92a3d928 100644 --- a/stor/obs.py +++ b/stor/obs.py @@ -1,6 +1,7 @@ import locale import posixpath import sys +import warnings from cached_property import cached_property import six @@ -90,6 +91,7 @@ def is_ambiguous(self): """Returns true if it cannot be determined if the path is a file or directory """ + warnings.warn('this function is deprecated and will be removed in stor 2.0') return not self.endswith('/') and not self.ext def _get_parts(self): @@ -282,11 +284,14 @@ def __init__(self, pth, mode='r', encoding=None, **kwargs): use binary mode OR explicitly set an encoding when reading/writing text (because writers from different computers may store data on OBS in different ways). Python 3 only. + Raises: + ValueError: if the mode is not valid or the path is ambiguous (see ``stor.copy``) """ if mode not in self._VALID_MODES: raise ValueError('invalid mode for file: %r' % mode) if six.PY2 and encoding: # pragma: no cover raise TypeError('encoding not supported in Python 2') + utils.validate_file_path(pth) self._path = pth self.mode = mode self._kwargs = kwargs diff --git a/stor/swift.py b/stor/swift.py index 51c4aacd..a37b34b2 100644 --- a/stor/swift.py +++ b/stor/swift.py @@ -688,11 +688,7 @@ def open(self, mode='r', encoding=None, swift_upload_options=None): ("r" or "rb") and writing ("w", "wb") encoding (str): text encoding to use. Defaults to ``locale.getpreferredencoding(False)`` (Python 3 only) - swift_upload_options (dict): DEPRECATED (use `stor.settings.use()` - instead). A dictionary of arguments that will be - passed as keyword args to `SwiftPath.upload` if any writes - occur on the opened resource. - + swift_upload_options (dict): DEPRECATED (use `stor.settings.use()` instead). Returns: SwiftFile: The swift object. @@ -700,6 +696,8 @@ def open(self, mode='r', encoding=None, swift_upload_options=None): SwiftError: A swift client error occurred. """ swift_upload_options = swift_upload_options or {} + if swift_upload_options: + warnings.warn('swift_upload_options will be removed in stor 2.0') return SwiftFile(self, mode=mode, encoding=encoding, **swift_upload_options) @_swift_retry(exceptions=(ConditionNotMetError, UnavailableError)) diff --git a/stor/test.py b/stor/test.py index b5c31dd5..3f8b5b0d 100644 --- a/stor/test.py +++ b/stor/test.py @@ -207,3 +207,7 @@ def setUp(self): del s3._thread_local.s3_transfer_config except AttributeError: pass + + +class StorTestCase(S3TestCase, SwiftTestCase): + """A TestCase class that mocks BOTH s3 and stor for tests""" diff --git a/stor/tests/test_cli.py b/stor/tests/test_cli.py index 9228f7c7..891ed5f5 100644 --- a/stor/tests/test_cli.py +++ b/stor/tests/test_cli.py @@ -40,7 +40,9 @@ def test_cli_error(self, mock_list, mock_stderr): @mock.patch('stor.settings.USER_CONFIG_FILE', '') def test_cli_config(self, mock_copytree): expected_settings = { - 'stor': {}, + 'stor': { + 'raise_on_ambiguous_filesystem_path': False + }, 's3': { 'aws_access_key_id': '', 'aws_secret_access_key': '', @@ -444,14 +446,14 @@ def test_walkfiles_no_pattern(self, mock_walkfiles): './a/b.txt', './c.txt', './d.txt', - './file' + './file.ext' ] self.parse_args('stor walkfiles .') self.assertEquals(sys.stdout.getvalue(), './a/b.txt\n' './c.txt\n' './d.txt\n' - './file\n') + './file.ext\n') mock_walkfiles.assert_called_once_with(PosixPath('.')) @@ -459,16 +461,16 @@ class TestCat(BaseCliTest): @mock.patch.object(S3Path, 'read_object', autospec=True) def test_cat_s3(self, mock_read): mock_read.return_value = b'hello world\n' - self.parse_args('stor cat s3://test/file') + self.parse_args('stor cat s3://test/file.ext') self.assertEquals(sys.stdout.getvalue(), 'hello world\n') - mock_read.assert_called_once_with(S3Path('s3://test/file')) + mock_read.assert_called_once_with(S3Path('s3://test/file.ext')) @mock.patch.object(SwiftPath, 'read_object', autospec=True) def test_cat_swift(self, mock_read): mock_read.return_value = b'hello world' - self.parse_args('stor cat swift://some/test/file') + self.parse_args('stor cat swift://some/test/file.ext') self.assertEquals(sys.stdout.getvalue(), 'hello world\n') - mock_read.assert_called_once_with(SwiftPath('swift://some/test/file')) + mock_read.assert_called_once_with(SwiftPath('swift://some/test/file.ext')) class TestCd(BaseCliTest): @@ -507,14 +509,14 @@ def test_cd_swift(self, mock_isdir): @mock.patch('sys.stderr', new=six.StringIO()) def test_cd_not_dir_s3(self, mock_isdir): with self.assertRaisesRegexp(SystemExit, '1'): - self.parse_args('stor cd s3://test/file') + self.parse_args('stor cd s3://test/file.ext') self.assertIn('not a directory', sys.stderr.getvalue()) @mock.patch.object(SwiftPath, 'isdir', return_value=False, autospec=True) @mock.patch('sys.stderr', new=six.StringIO()) def test_cd_not_dir_swift(self, mock_isdir): with self.assertRaisesRegexp(SystemExit, '1'): - self.parse_args('stor cd swift://test/container/file') + self.parse_args('stor cd swift://test/container/file.ext') self.assertIn('not a directory', sys.stderr.getvalue()) @mock.patch('sys.stderr', new=six.StringIO()) diff --git a/stor/tests/test_posix.py b/stor/tests/test_posix.py index fca2a28c..d7b4033c 100644 --- a/stor/tests/test_posix.py +++ b/stor/tests/test_posix.py @@ -92,25 +92,6 @@ def test_posix_file_destination(self): self.assertTrue(dest.exists()) self.assertEquals(dest.open().read(), '1') - def test_ambigious_swift_resource_destination(self): - with stor.NamedTemporaryDirectory() as tmp_d: - source = tmp_d / '1' - with open(source, 'w') as tmp_file: - tmp_file.write('1') - - dest = 'swift://tenant/container/ambiguous-resource' - with self.assertRaisesRegexp(ValueError, 'OBS destination'): - utils.copy(source, dest) - - def test_ambigious_swift_container_destination(self): - with stor.NamedTemporaryDirectory() as tmp_d: - source = tmp_d / '1' - with open(source, 'w') as tmp_file: - tmp_file.write('1') - - dest = 'swift://tenant/ambiguous-container' - with self.assertRaisesRegexp(ValueError, 'OBS destination'): - utils.copy(source, dest) def test_tenant_swift_destination(self): with stor.NamedTemporaryDirectory() as tmp_d: @@ -119,7 +100,7 @@ def test_tenant_swift_destination(self): with open(source / '1', 'w') as tmp_file: tmp_file.write('1') - dest = 'swift://tenant/' + dest = 'swift://tenant.with.dots' with self.assertRaisesRegexp(ValueError, 'copy to tenant'): utils.copy(source / '1', dest) diff --git a/stor/tests/test_s3.py b/stor/tests/test_s3.py index 59cf303c..d28c1642 100644 --- a/stor/tests/test_s3.py +++ b/stor/tests/test_s3.py @@ -101,16 +101,16 @@ def test_resource_none_w_bucket(self): self.assertIsNone(s3_p.resource) def test_resource_object(self): - s3_p = S3Path('s3://bucket/obj') - self.assertEquals(s3_p.resource, 'obj') + s3_p = S3Path('s3://bucket/obj.ext') + self.assertEquals(s3_p.resource, 'obj.ext') def test_resource_single_dir(self): s3_p = S3Path('s3://bucket/dir/') self.assertEquals(s3_p.resource, 'dir/') def test_resource_nested_obj(self): - s3_p = S3Path('s3://bucket/nested/obj') - self.assertEquals(s3_p.resource, 'nested/obj') + s3_p = S3Path('s3://bucket/nested/obj.ext') + self.assertEquals(s3_p.resource, 'nested/obj.ext') def test_resource_nested_dir(self): s3_p = S3Path('s3://bucket/nested/dir/') @@ -348,14 +348,14 @@ def test_list_condition(self): @mock.patch('botocore.response.StreamingBody', autospec=True) def test_list_w_use_manifest(self, mock_stream): - mock_stream.read.return_value = b'my/obj1\nmy/obj2\nmy/obj3\n' + mock_stream.read.return_value = b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n' self.mock_s3.get_object.return_value = {'Body': mock_stream} mock_list = self.mock_s3_iterator mock_list.__iter__.return_value = [{ 'Contents': [ - {'Key': 'my/obj1'}, - {'Key': 'my/obj2'}, - {'Key': 'my/obj3'} + {'Key': 'my/obj1.ext'}, + {'Key': 'my/obj2.ext'}, + {'Key': 'my/obj3.ext'} ], 'IsTruncated': False }] @@ -363,20 +363,20 @@ def test_list_w_use_manifest(self, mock_stream): s3_p = S3Path('s3://bucket') results = s3_p.list(use_manifest=True) self.assertEquals(set(results), set([ - 's3://bucket/my/obj1', - 's3://bucket/my/obj2', - 's3://bucket/my/obj3' + 's3://bucket/my/obj1.ext', + 's3://bucket/my/obj2.ext', + 's3://bucket/my/obj3.ext' ])) @mock.patch('botocore.response.StreamingBody', autospec=True) def test_list_w_use_manifest_validation_err(self, mock_stream): - mock_stream.read.return_value = b'my/obj1\nmy/obj2\nmy/obj3\n' + mock_stream.read.return_value = b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n' self.mock_s3.get_object.return_value = {'Body': mock_stream} mock_list = self.mock_s3_iterator mock_list.__iter__.return_value = [{ 'Contents': [ - {'Key': 'my/obj1'}, - {'Key': 'my/obj2'} + {'Key': 'my/obj1.ext'}, + {'Key': 'my/obj2.ext'} ], 'IsTruncated': False }] @@ -387,14 +387,14 @@ def test_list_w_use_manifest_validation_err(self, mock_stream): @mock.patch('botocore.response.StreamingBody', autospec=True) def test_list_w_condition_and_use_manifest(self, mock_stream): - mock_stream.read.return_value = b'my/obj1\nmy/obj2\nmy/obj3\n' + mock_stream.read.return_value = b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n' self.mock_s3.get_object.return_value = {'Body': mock_stream} mock_list = self.mock_s3_iterator mock_list.__iter__.return_value = [{ 'Contents': [ - {'Key': 'my/obj1'}, - {'Key': 'my/obj2'}, - {'Key': 'my/obj3'} + {'Key': 'my/obj1.ext'}, + {'Key': 'my/obj2.ext'}, + {'Key': 'my/obj3.ext'} ], 'IsTruncated': False }] @@ -402,9 +402,9 @@ def test_list_w_condition_and_use_manifest(self, mock_stream): s3_p = S3Path('s3://bucket') results = s3_p.list(use_manifest=True, condition=lambda results: len(results) == 3) self.assertEquals(set(results), set([ - 's3://bucket/my/obj1', - 's3://bucket/my/obj2', - 's3://bucket/my/obj3' + 's3://bucket/my/obj1.ext', + 's3://bucket/my/obj2.ext', + 's3://bucket/my/obj3.ext' ])) def test_list_ignore_dir_markers(self): @@ -641,7 +641,7 @@ def test_getsize_file(self): 'ContentLength': 15, 'ContentType': 'text/plain' } - s3_p = S3Path('s3://bucket/obj') + s3_p = S3Path('s3://bucket/obj.ext') self.assertEquals(s3_p.getsize(), 15) @mock.patch.object(S3Path, 'exists', autospec=True) @@ -834,17 +834,17 @@ def test_rmtree_dir(self, mock_list): def test_rmtree_over_1000(self, mock_list): mock_delete_objects = self.mock_s3.delete_objects mock_delete_objects.return_value = {} - objects = [S3Path('s3://bucket/obj' + str(i)) for i in range(1234)] + objects = [S3Path('s3://bucket/obj.ext' + str(i)) for i in range(1234)] mock_list.return_value = objects s3_p = S3Path('s3://bucket') s3_p.rmtree() mock_delete_objects.assert_has_calls([ mock.call(Bucket='bucket', Delete={ - 'Objects': [{'Key': 'obj' + str(i)} for i in range(1000)] + 'Objects': [{'Key': 'obj.ext' + str(i)} for i in range(1000)] }), mock.call(Bucket='bucket', Delete={ - 'Objects': [{'Key': 'obj' + str(i + 1000)} for i in range(234)] + 'Objects': [{'Key': 'obj.ext' + str(i + 1000)} for i in range(234)] }) ]) self.assertEquals([], objects) @@ -1228,12 +1228,12 @@ def test_download_w_condition(self, mock_list, mock_getsize, mock_make_dest): @mock.patch('botocore.response.StreamingBody', autospec=True) def test_download_w_use_manifest(self, mock_stream, mock_list, mock_getsize, mock_make_dest_dir): - mock_stream.read.return_value = b'my/obj1\nmy/obj2\nmy/obj3\n' + mock_stream.read.return_value = b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n' self.mock_s3.get_object.return_value = {'Body': mock_stream} mock_list.return_value = [ - S3Path('s3://bucket/my/obj1'), - S3Path('s3://bucket/my/obj2'), - S3Path('s3://bucket/my/obj3') + S3Path('s3://bucket/my/obj1.ext'), + S3Path('s3://bucket/my/obj2.ext'), + S3Path('s3://bucket/my/obj3.ext') ] s3_p = S3Path('s3://bucket') @@ -1244,11 +1244,11 @@ def test_download_w_use_manifest(self, mock_stream, mock_list, mock_getsize, @mock.patch('botocore.response.StreamingBody', autospec=True) def test_download_w_use_manifest_validation_err(self, mock_stream, mock_list, mock_getsize, mock_make_dest_dir): - mock_stream.read.return_value = b'my/obj1\nmy/obj2\nmy/obj3\n' + mock_stream.read.return_value = b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n' self.mock_s3.get_object.return_value = {'Body': mock_stream} mock_list.return_value = [ - S3Path('s3://bucket/my/obj1'), - S3Path('s3://bucket/my/obj2') + S3Path('s3://bucket/my/obj1.ext'), + S3Path('s3://bucket/my/obj2.ext') ] s3_p = S3Path('s3://bucket') @@ -1259,12 +1259,12 @@ def test_download_w_use_manifest_validation_err(self, mock_stream, mock_list, mo @mock.patch('botocore.response.StreamingBody', autospec=True) def test_download_w_condition_and_use_manifest(self, mock_stream, mock_list, mock_getsize, mock_make_dest_dir): - mock_stream.read.return_value = b'my/obj1\nmy/obj2\nmy/obj3\n' + mock_stream.read.return_value = b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n' self.mock_s3.get_object.return_value = {'Body': mock_stream} mock_list.return_value = [ - S3Path('s3://bucket/my/obj1'), - S3Path('s3://bucket/my/obj2'), - S3Path('s3://bucket/my/obj3') + S3Path('s3://bucket/my/obj1.ext'), + S3Path('s3://bucket/my/obj2.ext'), + S3Path('s3://bucket/my/obj3.ext') ] s3_p = S3Path('s3://bucket') @@ -1290,9 +1290,9 @@ def test_download_object_threads(self, mock_pool, mock_list, mock_getsize, @mock.patch.object(S3Path, 'list', autospec=True) def test_download_remote_error(self, mock_list, mock_getsize, mock_make_dest_dir): mock_list.return_value = [ - S3Path('s3://bucket/my/obj1'), - S3Path('s3://bucket/my/obj2'), - S3Path('s3://bucket/my/obj3') + S3Path('s3://bucket/my/obj1.ext'), + S3Path('s3://bucket/my/obj2.ext'), + S3Path('s3://bucket/my/obj3.ext') ] self.mock_s3_transfer.download_file.side_effect = RetriesExceededError('failed') @@ -1302,9 +1302,9 @@ def test_download_remote_error(self, mock_list, mock_getsize, mock_make_dest_dir @mock.patch.object(S3Path, 'list', autospec=True) def test_download_other_error(self, mock_list, mock_getsize, mock_make_dest_dir): mock_list.return_value = [ - S3Path('s3://bucket/my/obj1'), - S3Path('s3://bucket/my/obj2'), - S3Path('s3://bucket/my/obj3') + S3Path('s3://bucket/my/obj1.ext'), + S3Path('s3://bucket/my/obj2.ext'), + S3Path('s3://bucket/my/obj3.ext') ] self.mock_s3_transfer.download_file.side_effect = [None, ValueError] @@ -1314,9 +1314,9 @@ def test_download_other_error(self, mock_list, mock_getsize, mock_make_dest_dir) @mock.patch.object(S3Path, 'list', autospec=True) def test_download_multipart_settings(self, mock_list, mock_getsize, mock_make_dest_dir): mock_list.return_value = [ - S3Path('s3://bucket/my/obj1'), - S3Path('s3://bucket/my/obj2'), - S3Path('s3://bucket/my/obj3') + S3Path('s3://bucket/my/obj1.ext'), + S3Path('s3://bucket/my/obj2.ext'), + S3Path('s3://bucket/my/obj3.ext') ] s3_p = S3Path('s3://bucket') with settings.use({'s3:download': {'segment_size': '5M', 'segment_threads': 20}}): @@ -1360,14 +1360,14 @@ def test_copy_posix_dir_destination(self, mockdownload_object): mockdownload_object.assert_called_once_with(p, Path(tmp_d) / 'file_source.txt') def test_copy_swift_destination(self): - p = S3Path('s3://bucket/key/file_source') + p = S3Path('s3://bucket/key/file_source.ext') with self.assertRaisesRegexp(ValueError, 'OBS path'): - p.copy('swift://tenant/container/file_dest') + p.copy('swift://tenant/container/file_dest.ext') def test_copy_s3_destination(self): - p = S3Path('s3://bucket/key/file_source') + p = S3Path('s3://bucket/key/file_source.ext') with self.assertRaisesRegexp(ValueError, 'OBS path'): - p.copy('s3://bucket/key/file_dest') + p.copy('s3://bucket/key/file_dest.ext') class TestCopytree(S3TestCase): @@ -1395,7 +1395,7 @@ def test_copytree_windows_destination(self): class TestS3File(S3TestCase): def test_invalid_buffer_mode(self): - s3_f = S3Path('s3://bucket/key/obj').open() + s3_f = S3Path('s3://bucket/key/obj.ext').open() s3_f.mode = 'invalid' with self.assertRaisesRegexp(ValueError, 'buffer'): s3_f._buffer @@ -1404,13 +1404,13 @@ def test_invalid_buffer_mode(self): def test_invalid_flush_mode(self, mock_stream): mock_stream.read.return_value = b'data' self.mock_s3.get_object.return_value = {'Body': mock_stream} - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') obj = s3_p.open() with self.assertRaisesRegexp(TypeError, 'flush'): obj.flush() def test_name(self): - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') obj = s3_p.open() self.assertEquals(obj.name, s3_p) @@ -1418,7 +1418,7 @@ def test_name(self): def test_context_manager_on_closed_file(self, mock_stream): mock_stream.read.return_value = b'data' self.mock_s3.get_object.return_value = {'Body': mock_stream} - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') obj = s3_p.open() obj.close() @@ -1427,7 +1427,7 @@ def test_context_manager_on_closed_file(self, mock_stream): pass # pragma: no cover def test_invalid_mode(self): - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') with self.assertRaisesRegexp(ValueError, 'invalid mode'): s3_p.open(mode='invalid') @@ -1443,7 +1443,7 @@ class MyFile(object): def test_read_on_closed_file(self, mock_stream): mock_stream.read.return_value = b'data' self.mock_s3.get_object.return_value = {'Body': mock_stream} - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') obj = s3_p.open() obj.close() @@ -1451,7 +1451,7 @@ def test_read_on_closed_file(self, mock_stream): obj.read() def test_read_invalid_mode(self): - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') with self.assertRaisesRegexp(TypeError, 'mode.*read'): s3_p.open(mode='wb').read() @@ -1460,7 +1460,7 @@ def test_read_success(self, mock_stream): mock_stream.read.return_value = b'data' self.mock_s3.get_object.return_value = {'Body': mock_stream} - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') self.assertEquals(s3_p.open().read(), 'data') @mock.patch('botocore.response.StreamingBody', autospec=True) @@ -1474,7 +1474,7 @@ def test_iterating_over_files(self, mock_stream): mock_stream.read.return_value = data self.mock_s3.get_object.return_value = {'Body': mock_stream} - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') self.assertEquals(s3_p.open().read(), data.decode('ascii')) self.assertEquals(s3_p.open().readlines(), [l + '\n' for l in data.decode('ascii').split('\n')][:-1]) @@ -1485,7 +1485,7 @@ def test_iterating_over_files(self, mock_stream): self.assertEqual(next(iter(s3_p.open())), 'line1\n') def test_write_invalid_args(self): - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') obj = s3_p.open(mode='r') with self.assertRaisesRegexp(TypeError, 'mode.*write'): obj.write('hello') @@ -1493,7 +1493,7 @@ def test_write_invalid_args(self): @mock.patch('time.sleep', autospec=True) def test_binary_write_multiple_w_context_manager(self, mock_sleep): mock_upload = self.mock_s3_transfer.upload_file - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') with s3_p.open(mode='wb') as obj: obj.write(b'hello') obj.write(b' world') @@ -1504,7 +1504,7 @@ def test_binary_write_multiple_w_context_manager(self, mock_sleep): @mock.patch('time.sleep', autospec=True) def test_write_multiple_flush_multiple_upload(self, mock_sleep): mock_upload = self.mock_s3_transfer.upload_file - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') with NamedTemporaryFile('wb', delete=False) as ntf1,\ NamedTemporaryFile('wb', delete=False) as ntf2,\ NamedTemporaryFile('wb', delete=False) as ntf3: @@ -1534,7 +1534,7 @@ def test_write_multiple_flush_multiple_upload(self, mock_sleep): @mock.patch('time.sleep', autospec=True) def test_close_no_writes(self, mock_sleep): mock_upload = self.mock_s3_transfer.upload_file - s3_p = S3Path('s3://bucket/key/obj') + s3_p = S3Path('s3://bucket/key/obj.ext') obj = s3_p.open(mode='wb') obj.close() diff --git a/stor/tests/test_settings.py b/stor/tests/test_settings.py index 74f57e6e..df32f262 100644 --- a/stor/tests/test_settings.py +++ b/stor/tests/test_settings.py @@ -32,7 +32,9 @@ class TestSettings(unittest.TestCase): @mock.patch.dict(os.environ, {}, clear=True) def test_initialize_default(self): expected_settings = { - 'stor': {}, + 'stor': { + 'raise_on_ambiguous_filesystem_path': False, + }, 's3': { 'aws_access_key_id': '', 'aws_secret_access_key': '', @@ -83,7 +85,9 @@ def test_initialize_default(self): @mock.patch.dict(os.environ, {}, clear=True) def test_initialize_w_user_file(self): expected_settings = { - 'stor': {}, + 'stor': { + 'raise_on_ambiguous_filesystem_path': False, + }, 's3': { 'aws_access_key_id': '', 'aws_secret_access_key': '', diff --git a/stor/tests/test_swift.py b/stor/tests/test_swift.py index bfc38d10..11a9d440 100644 --- a/stor/tests/test_swift.py +++ b/stor/tests/test_swift.py @@ -87,7 +87,7 @@ def test_basename(self): class TestManifestUtilities(unittest.TestCase): def test_generate_save_and_read_manifest(self): with NamedTemporaryDirectory() as tmp_d: - manifest_contents = ['obj1', 'dir/obj2'] + manifest_contents = ['obj1', 'dir/obj2.ext'] utils.generate_and_save_data_manifest(tmp_d, manifest_contents) read_contents = utils.get_data_manifest_contents(tmp_d) @@ -162,8 +162,8 @@ def test_w_container_no_resource(self): self.assertIsNone(swift_p.resource) def test_resource_single_object(self): - swift_p = SwiftPath('swift://tenant/container/obj') - self.assertEquals(swift_p.resource, 'obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') + self.assertEquals(swift_p.resource, 'obj.ext') def test_resource_single_dir_w_slash(self): swift_p = SwiftPath('swift://tenant/container/dir/') @@ -233,29 +233,29 @@ def setUp(self): def test_makedirs_p_does_nothing(self): # dumb test... but why not? - SwiftPath('swift://tenant/container/obj').makedirs_p() + SwiftPath('swift://tenant/container/obj.ext').makedirs_p() def test_invalid_buffer_mode(self): - swift_f = SwiftPath('swift://tenant/container/obj').open() + swift_f = SwiftPath('swift://tenant/container/obj.ext').open() swift_f.mode = 'invalid' with self.assertRaisesRegexp(ValueError, 'buffer'): swift_f._buffer def test_invalid_flush_mode(self): self.mock_swift_conn.get_object.return_value = ('header', b'data') - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') obj = swift_p.open() with self.assertRaisesRegexp(TypeError, 'flush'): obj.flush() def test_name(self): - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') obj = swift_p.open() self.assertEquals(obj.name, swift_p) def test_context_manager_on_closed_file(self): self.mock_swift_conn.get_object.return_value = ('header', b'data') - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') obj = swift_p.open() obj.close() @@ -264,7 +264,7 @@ def test_context_manager_on_closed_file(self): pass # pragma: no cover def test_invalid_mode(self): - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') with self.assertRaisesRegexp(ValueError, 'invalid mode'): swift_p.open(mode='invalid') @@ -278,7 +278,7 @@ class MyFile(object): def test_read_on_closed_file(self): self.mock_swift_conn.get_object.return_value = ('header', b'data') - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') obj = swift_p.open() obj.close() @@ -286,14 +286,14 @@ def test_read_on_closed_file(self): obj.read() def test_read_invalid_mode(self): - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') with self.assertRaisesRegexp(TypeError, 'mode.*read'): swift_p.open(mode='wb').read() def test_read_success(self): self.mock_swift_conn.get_object.return_value = ('header', b'data') - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') self.assertEquals(swift_p.open().read(), 'data') def test_iterating_over_files(self): @@ -305,7 +305,7 @@ def test_iterating_over_files(self): ''' self.mock_swift_conn.get_object.return_value = ('header', data) - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') # open().read() should return str for r self.assertEquals(swift_p.open('r').read(), data.decode('ascii')) # open().read() should return bytes for rb @@ -324,13 +324,13 @@ def test_read_success_on_second_try(self, mock_sleep): ClientException('dummy', 'dummy', http_status=404), ('header', b'data') ] - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') obj = swift_p.open() self.assertEquals(obj.read(), 'data') self.assertEquals(len(mock_sleep.call_args_list), 1) def test_write_invalid_args(self): - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') obj = swift_p.open(mode='r') with self.assertRaisesRegexp(TypeError, 'mode.*write'): obj.write('hello') @@ -342,7 +342,7 @@ def test_write_use_manifest_multiple_and_close(self, mock_upload, mock_sleep): with mock.patch('tempfile.NamedTemporaryFile', autospec=True) as ntf_mock: ntf_mock.side_effect = [fp] - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') obj = swift_p.open(mode='wb', swift_upload_options={ 'use_manifest': True }) @@ -358,7 +358,7 @@ def test_write_use_manifest_multiple_and_close(self, mock_upload, mock_sleep): @mock.patch('time.sleep', autospec=True) @mock.patch.object(SwiftPath, 'upload', autospec=True) def test_write_multiple_w_context_manager(self, mock_upload, mock_sleep): - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') with swift_p.open(mode='wb') as obj: obj.write(b'hello') obj.write(b' world') @@ -368,7 +368,7 @@ def test_write_multiple_w_context_manager(self, mock_upload, mock_sleep): @mock.patch.object(SwiftPath, 'upload', autospec=True) def test_write_multiple_flush_multiple_upload(self, mock_upload, mock_sleep): - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') with NamedTemporaryFile(delete=False) as ntf1,\ NamedTemporaryFile(delete=False) as ntf2,\ NamedTemporaryFile(delete=False) as ntf3: @@ -395,7 +395,7 @@ def test_write_multiple_flush_multiple_upload(self, mock_upload, @mock.patch('time.sleep', autospec=True) @mock.patch.object(SwiftPath, 'upload', autospec=True) def test_close_no_writes(self, mock_upload, mock_sleep): - swift_p = SwiftPath('swift://tenant/container/obj') + swift_p = SwiftPath('swift://tenant/container/obj.ext') obj = swift_p.open(mode='wb') obj.close() @@ -484,7 +484,7 @@ def test_no_auth_url(self): } with settings.use(temp_settings): with self.assertRaisesRegexp(ValueError, 'auth url'): - SwiftPath('swift://tenant/container/obj').temp_url() + SwiftPath('swift://tenant/container/obj.ext').temp_url() def test_no_temp_url_key(self): temp_settings = { @@ -495,7 +495,7 @@ def test_no_temp_url_key(self): } with settings.use(temp_settings): with self.assertRaisesRegexp(ValueError, 'temporary url key'): - SwiftPath('swift://tenant/container/obj').temp_url() + SwiftPath('swift://tenant/container/obj.ext').temp_url() class TestList(SwiftTestCase): @@ -892,52 +892,52 @@ def test_list_starts_with_no_resource(self): @mock.patch('time.sleep', autospec=True) def test_list_w_condition_and_use_manifest(self, mock_sleep): - self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1\nmy/obj2\nmy/obj3\n') + self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n') mock_list = self.mock_swift_conn.get_container mock_list.return_value = ({}, [{ - 'name': 'my/obj1' + 'name': 'my/obj1.ext' }, { - 'name': 'my/obj2' + 'name': 'my/obj2.ext' }, { - 'name': 'my/obj3' + 'name': 'my/obj3.ext' }]) swift_p = SwiftPath('swift://tenant/container/') results = swift_p.list(use_manifest=True, condition=lambda results: len(results) == 3) self.assertEquals(set(results), set([ - 'swift://tenant/container/my/obj1', - 'swift://tenant/container/my/obj2', - 'swift://tenant/container/my/obj3' + 'swift://tenant/container/my/obj1.ext', + 'swift://tenant/container/my/obj2.ext', + 'swift://tenant/container/my/obj3.ext' ])) @mock.patch('time.sleep', autospec=True) def test_list_use_manifest(self, mock_sleep): - self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1\nmy/obj2\nmy/obj3\n') + self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n') mock_list = self.mock_swift_conn.get_container mock_list.return_value = ({}, [{ - 'name': 'my/obj1' + 'name': 'my/obj1.ext' }, { - 'name': 'my/obj2' + 'name': 'my/obj2.ext' }, { - 'name': 'my/obj3' + 'name': 'my/obj3.ext' }]) swift_p = SwiftPath('swift://tenant/container/') results = swift_p.list(use_manifest=True) self.assertEquals(set(results), set([ - 'swift://tenant/container/my/obj1', - 'swift://tenant/container/my/obj2', - 'swift://tenant/container/my/obj3' + 'swift://tenant/container/my/obj1.ext', + 'swift://tenant/container/my/obj2.ext', + 'swift://tenant/container/my/obj3.ext' ])) @mock.patch('time.sleep', autospec=True) def test_list_use_manifest_validation_err(self, mock_sleep): - self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1\nmy/obj2\nmy/obj3\n') + self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n') mock_list = self.mock_swift_conn.get_container mock_list.return_value = ({}, [{ - 'name': 'my/obj1' + 'name': 'my/obj1.ext' }, { - 'name': 'my/obj2' + 'name': 'my/obj2.ext' }]) swift_p = SwiftPath('swift://tenant/container/') @@ -949,35 +949,35 @@ class TestWalkFiles(SwiftTestCase): def test_no_pattern_w_dir_markers(self): mock_list = self.mock_swift_conn.get_container mock_list.return_value = ({}, [{ - 'name': 'my/obj1', + 'name': 'my/obj1.ext', 'content_type': 'application/directory' }, { - 'name': 'my/obj2', + 'name': 'my/obj2.ext', 'content_type': 'text/directory' }, { - 'name': 'my/obj3', + 'name': 'my/obj3.ext', 'content_type': 'application/octet-stream' }, { - 'name': 'my/obj4', + 'name': 'my/obj4.ext', 'content_type': 'application/octet-stream' }]) f = list(SwiftPath('swift://tenant/container').walkfiles()) self.assertEquals(set(f), set([ - SwiftPath('swift://tenant/container/my/obj3'), - SwiftPath('swift://tenant/container/my/obj4') + SwiftPath('swift://tenant/container/my/obj3.ext'), + SwiftPath('swift://tenant/container/my/obj4.ext') ])) def test_w_pattern_w_dir_markers(self): mock_list = self.mock_swift_conn.get_container mock_list.return_value = ({}, [{ - 'name': 'my/obj1', + 'name': 'my/obj1.ext', 'content_type': 'application/directory' }, { - 'name': 'my/obj2', + 'name': 'my/obj2.ext', 'content_type': 'text/directory' }, { - 'name': 'my/obj3', + 'name': 'my/obj3.ext', 'content_type': 'application/octet-stream' }, { 'name': 'my/obj4.sh', @@ -986,7 +986,7 @@ def test_w_pattern_w_dir_markers(self): 'name': 'my/other/obj5.sh', 'content_type': 'application/octet-stream' }, { - 'name': 'my/dirwithpattern.sh/obj6', + 'name': 'my/dirwithpattern.sh/obj6.ext', 'content_type': 'application/octet-stream' }]) @@ -1413,18 +1413,18 @@ def test_download_correct_thread_options(self): @mock.patch('time.sleep', autospec=True) @mock.patch.object(SwiftPath, 'list', autospec=True) def test_download_w_condition_and_use_manifest(self, mock_list, mock_sleep): - self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1\nmy/obj2\nmy/obj3\n') + self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n') self.mock_swift.download.return_value = [{ 'action': 'download_object', - 'object': 'my/obj1', + 'object': 'my/obj1.ext', 'success': True }, { 'action': 'download_object', - 'object': 'my/obj2', + 'object': 'my/obj2.ext', 'success': True }, { 'action': 'download_object', - 'object': 'my/obj3', + 'object': 'my/obj3.ext', 'success': True }] @@ -1448,18 +1448,18 @@ def test_download_w_condition_and_use_manifest(self, mock_list, mock_sleep): @mock.patch('time.sleep', autospec=True) @mock.patch.object(SwiftPath, 'list', autospec=True) def test_download_w_use_manifest(self, mock_list, mock_sleep): - self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1\nmy/obj2\nmy/obj3\n') + self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n') self.mock_swift.download.return_value = [{ 'action': 'download_object', - 'object': 'my/obj1', + 'object': 'my/obj1.ext', 'success': True }, { 'action': 'download_object', - 'object': 'my/obj2', + 'object': 'my/obj2.ext', 'success': True }, { 'action': 'download_object', - 'object': 'my/obj3', + 'object': 'my/obj3.ext', 'success': True }] @@ -1481,14 +1481,16 @@ def test_download_w_use_manifest(self, mock_list, mock_sleep): @mock.patch('time.sleep', autospec=True) @mock.patch.object(SwiftPath, 'list', autospec=True) def test_download_w_use_manifest_validation_err(self, mock_list, mock_sleep): - self.mock_swift_conn.get_object.return_value = ('header', b'my/obj1\nmy/obj2\nmy/obj3\n') + self.mock_swift_conn.get_object.return_value = ( + 'header', + b'my/obj1.ext\nmy/obj2.ext\nmy/obj3.ext\n') self.mock_swift.download.return_value = [{ 'action': 'download_object', - 'object': 'my/obj1', + 'object': 'my/obj1.ext', 'success': True }, { 'action': 'download_object', - 'object': 'my/obj2', + 'object': 'my/obj2.ext', 'success': True }] @@ -2021,9 +2023,9 @@ def test_copy_posix_dir_destination(self, mockdownload_object): mockdownload_object.assert_called_once_with(p, Path(tmp_d) / 'file_source.txt') def test_copy_swift_destination(self): - p = SwiftPath('swift://tenant/container/file_source') + p = SwiftPath('swift://tenant/container/file_source.txt') with self.assertRaisesRegexp(ValueError, 'OBS path'): - p.copy('swift://tenant/container/file_dest') + p.copy('swift://tenant/container/file_dest.txt') class TestCopytree(SwiftTestCase): diff --git a/stor/tests/test_utils.py b/stor/tests/test_utils.py index 1d1b7766..7c70ae18 100644 --- a/stor/tests/test_utils.py +++ b/stor/tests/test_utils.py @@ -1,12 +1,14 @@ import errno import logging -import mock import ntpath import os import stat import unittest + from testfixtures import LogCapture +import mock +import pytest import stor from stor import Path @@ -15,6 +17,7 @@ from stor.swift import SwiftPath from stor.windows import WindowsPath from stor import utils +from stor.test import StorTestCase class TestBaseProgressLogger(unittest.TestCase): @@ -112,7 +115,7 @@ def test_w_broken_symlink(self): ) with utils.NamedTemporaryDirectory(dir=swift_dir) as tmp_dir: symlink = tmp_dir / 'broken.symlink' - symlink_source = tmp_dir / 'nonexistent' + symlink_source = tmp_dir / 'nonexistent.txt' # put something in symlink source so that Python doesn't complain with stor.open(symlink_source, 'w') as fp: fp.write('blah') @@ -460,3 +463,47 @@ def test_path_no_perms(self): self.mock_copy.side_effect = stor.exceptions.FailedUploadError('foo') self.assertFalse(utils.is_writeable('s3://stor-test/foo/bar')) self.assertFalse(self.mock_remove.called) + + +@mock.patch.object(stor.base.FileSystemPath, 'makedirs', lambda *args, **kwargs: None) +class TestAmbiguityValidation(StorTestCase): + # someday we'll be able to use pytest.parametrized, but not today + DRIVES = ["swift://", "s3://", "/"] + def test_open_detects_invalid_modes(self): + def tester(drive): + with self.assertRaisesRegexp(ValueError, 'mode'): + with stor.open(stor.join(drive, 'dummy/something/path.txt'), 'invalid'): + assert False, 'I should not be able to get into this block' + for drive in self.DRIVES: + tester(drive) + + @mock.patch('stor.base.builtins.open', lambda *args, **kwargs: None) + def test_open_should_detect_invalid_paths(self): + def tester(drive, subpath): + with self.assertRaisesRegexp(ValueError, 'file paths'): + with stor.open(stor.join(drive, subpath), 'w'): + assert False, 'I should not be able to get into this block' + with self.assertRaisesRegexp(ValueError, 'file paths'): + # prevent from going out of scope + f = stor.open(stor.join(drive, subpath)) + assert False and not f + with self.assertRaisesRegexp(ValueError, 'file paths'): + utils.copy('whatever', stor.join(drive, subpath)) + with stor.settings.use({'stor': {'raise_on_ambiguous_filesystem_path': True}}): + for drive in self.DRIVES: + for subpath in [ + 'tenant/ambiguous-container', 'T/C/trailing-slash/', 'T/C/no-extension']: + tester(drive, subpath) + with pytest.warns(UserWarning): + stor.open('/a/') + + @mock.patch('stor.base.builtins.open', lambda *args, **kwargs: None) + def test_validate_path(self): + def tester(subpath, regex): + with stor.settings.use({'stor': {'raise_on_ambiguous_filesystem_path': True}}): + for drive in ['swift://', 's3://', '/']: + with self.assertRaisesRegexp(ValueError, regex): + utils.validate_file_path(stor.join(drive, subpath)) + for subpath in ['T', 'T/C', 'T/C/no-extension']: + tester(subpath, 'without extension are ambiguous') + tester('T/C/something/', 'trailing slash') diff --git a/stor/utils.py b/stor/utils.py index a475e119..5d71c215 100644 --- a/stor/utils.py +++ b/stor/utils.py @@ -7,6 +7,7 @@ import shutil from subprocess import check_call import tempfile +import warnings from stor import exceptions @@ -312,6 +313,50 @@ def is_writeable(path, swift_retry_options=None): return answer +def validate_file_path(dest): + """Raises ValueError if dest is an ambiguous or invalid path. + + Args: + dest (Path): path to validate + Raises: + ValueError: if path is ambiguous and is not a filesystem path (see below) + Note: + if an ambiguous filesystem path and ``raise_on_ambiguous_filesystem_path`` is true, will raise + a ValueError as well, otherwise it warns. + + In OBS-land, we cannot differentiate between files and directories, except by convention. + Conventionally, directories are either prefixes or zero-sized objects ending with a trailing + slash. With POSIX copies, it's conventional to do something like:: + cp source/myfile.txt dest + + In which case the system checks whether dest is a directory and, if it is, dumps to + ``dest/myfile.txt`` or otherwise dumps to ``dest``. + + We can't do that kind of validation and don't want the outcome of a command to depend upon + whether there's a file with the same prefix or not. Thus, we raise an error if you try to use + `stor.copy()`. (to manipulate files without extensions, you can always use `stor.copytree()`. + """ + import stor.settings + + # we use a try/except so that it's easier for the reader to tell why the exception was + # generated. + try: + if not dest.name: + raise ValueError('file paths may not end with trailing slash') + if not dest.ext: + raise ValueError('file paths without extension are ambiguous') + except ValueError as e: + if (is_filesystem_path(dest) and + not stor.settings.get()['stor']['raise_on_ambiguous_filesystem_path']): + warnings.warn( + 'In stor 2.0, ambiguous filesystem paths will raise an error (%s)' % str(e), + # this works for stor.copy, will not look right when the copy method is used + stacklevel=4 + ) + return + raise + + def copy(source, dest, swift_retry_options=None): """Copies a source file to a destination file. @@ -347,18 +392,18 @@ def copy(source, dest, swift_retry_options=None): >>> local_p.copy('swift://tenant/container/dir') Traceback (most recent call last): ... - ValueError: OBS destination must be file with extension or directory with slash + ValueError: OBS file destinations without extension are ambiguous """ from stor import Path from stor.obs import OBSUploadObject source = Path(source) + validate_file_path(source) dest = Path(dest) + validate_file_path(dest) swift_retry_options = swift_retry_options or {} if is_obs_path(source) and is_obs_path(dest): raise ValueError('cannot copy one OBS path to another OBS path') - if is_obs_path(dest) and dest.is_ambiguous(): - raise ValueError('OBS destination must be file with extension or directory with slash') if is_filesystem_path(dest): dest.parent.makedirs_p()