From 9f6273123c16f6927b6f9712291a7075cbe9db58 Mon Sep 17 00:00:00 2001 From: crusaderky Date: Wed, 29 Mar 2023 13:05:54 +0100 Subject: [PATCH] Thread-safe Buffer.__items__; proper KeysView, ItemsView, ValuesView (#93) --- .coveragerc | 17 ++++++++++++ doc/source/changelog.rst | 4 ++- zict/buffer.py | 53 +++++++++++++++++++++++++++++++------- zict/cache.py | 8 +----- zict/file.py | 5 +--- zict/func.py | 12 +-------- zict/lmdb.py | 54 ++++++++++++++++++++++++++++++--------- zict/sieve.py | 13 +--------- zict/tests/test_buffer.py | 14 ++++++++++ zict/tests/test_cache.py | 10 ++++++++ zict/tests/test_lru.py | 2 +- zict/tests/test_zip.py | 49 +++++++++++++++++++++++++++-------- zict/tests/utils_test.py | 18 ++++++++++++- zict/zip.py | 14 ++-------- 14 files changed, 193 insertions(+), 80 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..2cfaa12 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,17 @@ +[run] +source = + zict +omit = + +[report] +show_missing = True +exclude_lines = + # re-enable the standard pragma + pragma: nocover + pragma: no cover + # always ignore type checking blocks + TYPE_CHECKING + @overload + +[html] +directory = coverage_html_report diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 01ec609..b6a7ddc 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -15,10 +15,12 @@ Changelog - ``LMDB`` now uses memory-mapped I/O on MacOSX and is usable on Windows. (:pr:`78`) `Guido Imperiale`_ - The library is now partially thread-safe. - (:pr:`82`, :pr:`90`) `Guido Imperiale`_ + (:pr:`82`, :pr:`90`, :pr:`93`) `Guido Imperiale`_ - :class:`LRU` and :class:`Buffer` now support delayed eviction. New objects :class:`Accumulator` and :class:`InsertionSortedSet`. (:pr:`87`) `Guido Imperiale`_ +- All mappings now return proper KeysView, ItemsView, and ValuesView objects from their + keys(), items(), and values() methods (:pr:`93`) `Guido Imperiale`_ 2.2.0 - 2022-04-28 diff --git a/zict/buffer.py b/zict/buffer.py index 140aabb..33c9afb 100644 --- a/zict/buffer.py +++ b/zict/buffer.py @@ -2,6 +2,10 @@ from collections.abc import Callable, Iterator, MutableMapping from itertools import chain +from typing import ( # TODO import from collections.abc (needs Python >=3.9) + ItemsView, + ValuesView, +) from zict.common import KT, VT, ZictBase, close, flush from zict.lru import LRU @@ -136,21 +140,30 @@ def __delitem__(self, key: KT) -> None: except KeyError: del self.slow[key] - # FIXME dictionary views https://github.com/dask/zict/issues/61 - def keys(self) -> Iterator[KT]: # type: ignore - return iter(self) + def values(self) -> ValuesView[VT]: + return BufferValuesView(self) - def values(self) -> Iterator[VT]: # type: ignore - return chain(self.fast.values(), self.slow.values()) - - def items(self) -> Iterator[tuple[KT, VT]]: # type: ignore - return chain(self.fast.items(), self.slow.items()) + def items(self) -> ItemsView[KT, VT]: + return BufferItemsView(self) def __len__(self) -> int: return len(self.fast) + len(self.slow) def __iter__(self) -> Iterator[KT]: - return chain(self.fast, self.slow) + """Make sure that the iteration is not disrupted if you evict/restore a key in + the middle of it + """ + seen = set() + while True: + try: + for d in (self.fast, self.slow): + for key in d: + if key not in seen: + seen.add(key) + yield key + return + except RuntimeError: + pass def __contains__(self, key: object) -> bool: return key in self.fast or key in self.slow @@ -165,3 +178,25 @@ def flush(self) -> None: def close(self) -> None: close(self.fast, self.slow) + + +class BufferItemsView(ItemsView[KT, VT]): + _mapping: Buffer # FIXME CPython implementation detail + __slots__ = () + + def __iter__(self) -> Iterator[tuple[KT, VT]]: + # Avoid changing the LRU + return chain(self._mapping.fast.items(), self._mapping.slow.items()) + + +class BufferValuesView(ValuesView[VT]): + _mapping: Buffer # FIXME CPython implementation detail + __slots__ = () + + def __contains__(self, value: object) -> bool: + # Avoid changing the LRU + return any(value == v for v in self) + + def __iter__(self) -> Iterator[VT]: + # Avoid changing the LRU + return chain(self._mapping.fast.values(), self._mapping.slow.values()) diff --git a/zict/cache.py b/zict/cache.py index 18f4363..5489569 100644 --- a/zict/cache.py +++ b/zict/cache.py @@ -1,7 +1,7 @@ from __future__ import annotations import weakref -from collections.abc import Iterator, KeysView, MutableMapping +from collections.abc import Iterator, MutableMapping from typing import TYPE_CHECKING from zict.common import KT, VT, ZictBase, close, flush @@ -67,7 +67,6 @@ def __getitem__(self, key: KT) -> VT: def __setitem__(self, key: KT, value: VT) -> None: # If the item was already in cache and data.__setitem__ fails, e.g. because it's # a File and the disk is full, make sure that the cache is invalidated. - # FIXME https://github.com/python/mypy/issues/10152 try: del self.cache[key] except KeyError: @@ -94,11 +93,6 @@ def __contains__(self, key: object) -> bool: # Do not let MutableMapping call self.data[key] return key in self.data - def keys(self) -> KeysView[KT]: - # Return a potentially optimized set-like, instead of letting MutableMapping - # build it from __iter__ on the fly - return self.data.keys() - def flush(self) -> None: flush(self.cache, self.data) diff --git a/zict/file.py b/zict/file.py index 2c419e2..9ee1652 100644 --- a/zict/file.py +++ b/zict/file.py @@ -3,7 +3,7 @@ import mmap import os import pathlib -from collections.abc import Iterator, KeysView +from collections.abc import Iterator from urllib.parse import quote, unquote from zict.common import ZictBase @@ -139,9 +139,6 @@ def __setitem__( def __contains__(self, key: object) -> bool: return key in self.filenames - def keys(self) -> KeysView[str]: - return self.filenames.keys() - def __iter__(self) -> Iterator[str]: return iter(self.filenames) diff --git a/zict/func.py b/zict/func.py index 8b1d512..33d24a0 100644 --- a/zict/func.py +++ b/zict/func.py @@ -1,6 +1,6 @@ from __future__ import annotations -from collections.abc import Callable, Iterable, Iterator, KeysView, MutableMapping +from collections.abc import Callable, Iterable, Iterator, MutableMapping from typing import Generic, TypeVar from zict.common import KT, VT, ZictBase, close, flush @@ -71,16 +71,6 @@ def __contains__(self, key: object) -> bool: def __delitem__(self, key: KT) -> None: del self.d[key] - def keys(self) -> KeysView[KT]: - return self.d.keys() - - # FIXME dictionary views https://github.com/dask/zict/issues/61 - def values(self) -> Iterator[VT]: # type: ignore - return (self.load(v) for v in self.d.values()) - - def items(self) -> Iterator[tuple[KT, VT]]: # type: ignore - return ((k, self.load(v)) for k, v in self.d.items()) - def _do_update(self, items: Iterable[tuple[KT, VT]]) -> None: it = ((k, self.dump(v)) for k, v in items) self.d.update(it) diff --git a/zict/lmdb.py b/zict/lmdb.py index 2a12d74..01f80fe 100644 --- a/zict/lmdb.py +++ b/zict/lmdb.py @@ -3,6 +3,10 @@ import pathlib import sys from collections.abc import Iterable, Iterator +from typing import ( # TODO import from collections.abc (needs Python >=3.9) + ItemsView, + ValuesView, +) from zict.common import ZictBase @@ -77,18 +81,15 @@ def __contains__(self, key: object) -> bool: with self.db.begin() as txn: return txn.cursor().set_key(_encode_key(key)) - # FIXME dictionary views https://github.com/dask/zict/issues/61 - def items(self) -> Iterator[tuple[str, bytes]]: # type: ignore - cursor = self.db.begin().cursor() - return ((_decode_key(k), v) for k, v in cursor.iternext(keys=True, values=True)) - - def keys(self) -> Iterator[str]: # type: ignore + def __iter__(self) -> Iterator[str]: cursor = self.db.begin().cursor() return (_decode_key(k) for k in cursor.iternext(keys=True, values=False)) - def values(self) -> Iterator[bytes]: # type: ignore - cursor = self.db.begin().cursor() - return cursor.iternext(keys=False, values=True) + def items(self) -> ItemsView[str, bytes]: + return LMDBItemsView(self) + + def values(self) -> ValuesView[bytes]: + return LMDBValuesView(self) def _do_update(self, items: Iterable[tuple[str, bytes]]) -> None: # Optimized version of update() using a single putmulti() call. @@ -97,9 +98,6 @@ def _do_update(self, items: Iterable[tuple[str, bytes]]) -> None: consumed, added = txn.cursor().putmulti(items_enc) assert consumed == added == len(items_enc) - def __iter__(self) -> Iterator[str]: - return self.keys() - def __delitem__(self, key: str) -> None: with self.db.begin(write=True) as txn: if not txn.delete(_encode_key(key)): @@ -110,3 +108,35 @@ def __len__(self) -> int: def close(self) -> None: self.db.close() + + +class LMDBItemsView(ItemsView[str, bytes]): + _mapping: LMDB # FIXME CPython implementation detail + __slots__ = () + + def __contains__(self, item: object) -> bool: + key: str + value: object + key, value = item # type: ignore + try: + v = self._mapping[key] + except (KeyError, AttributeError): + return False + else: + return v == value + + def __iter__(self) -> Iterator[tuple[str, bytes]]: + cursor = self._mapping.db.begin().cursor() + return ((_decode_key(k), v) for k, v in cursor.iternext(keys=True, values=True)) + + +class LMDBValuesView(ValuesView[bytes]): + _mapping: LMDB # FIXME CPython implementation detail + __slots__ = () + + def __contains__(self, value: object) -> bool: + return any(value == v for v in self) + + def __iter__(self) -> Iterator[bytes]: + cursor = self._mapping.db.begin().cursor() + return cursor.iternext(keys=False, values=True) diff --git a/zict/sieve.py b/zict/sieve.py index 78f20c0..44ff845 100644 --- a/zict/sieve.py +++ b/zict/sieve.py @@ -2,7 +2,6 @@ from collections import defaultdict from collections.abc import Callable, Iterable, Iterator, Mapping, MutableMapping -from itertools import chain from typing import Generic, TypeVar from zict.common import KT, VT, ZictBase, close, flush @@ -92,21 +91,11 @@ def _do_update(self, items: Iterable[tuple[KT, VT]]) -> None: for key, _ in mitems: self.key_to_mapping[key] = mapping - # FIXME dictionary views https://github.com/dask/zict/issues/61 - def keys(self) -> Iterator[KT]: # type: ignore - return chain.from_iterable(self.mappings.values()) - - def values(self) -> Iterator[VT]: # type: ignore - return chain.from_iterable(m.values() for m in self.mappings.values()) - - def items(self) -> Iterator[tuple[KT, VT]]: # type: ignore - return chain.from_iterable(m.items() for m in self.mappings.values()) - def __len__(self) -> int: return sum(map(len, self.mappings.values())) def __iter__(self) -> Iterator[KT]: - return self.keys() + return iter(self.key_to_mapping) def __contains__(self, key: object) -> bool: return key in self.key_to_mapping diff --git a/zict/tests/test_buffer.py b/zict/tests/test_buffer.py index 7ba346a..253658b 100644 --- a/zict/tests/test_buffer.py +++ b/zict/tests/test_buffer.py @@ -228,3 +228,17 @@ def test_set_noevict(): assert a == {"y": 3, "x": 1} assert b == {"z": 6} assert f2s == s2f == [] + + +def test_evict_restore_during_iter(): + """Test that __iter__ won't be disrupted if another thread evicts or restores a key""" + buff = Buffer({"x": 1, "y": 2}, {"z": 3}, n=5) + assert list(buff) == ["x", "y", "z"] + it = iter(buff) + assert next(it) == "x" + buff.fast.evict("x") + assert next(it) == "y" + assert buff["x"] == 1 + assert next(it) == "z" + with pytest.raises(StopIteration): + next(it) diff --git a/zict/tests/test_cache.py b/zict/tests/test_cache.py index 1feb181..57569df 100644 --- a/zict/tests/test_cache.py +++ b/zict/tests/test_cache.py @@ -4,6 +4,7 @@ import pytest from zict import Cache, WeakValueMapping +from zict.tests import utils_test def test_cache_get_set_del(): @@ -129,3 +130,12 @@ class C: b = "bbb" d["b"] = b assert "b" not in d + + +def test_mapping(): + """ + Test mapping interface for Cache(). + """ + buff = Cache({}, {}) + utils_test.check_mapping(buff) + utils_test.check_closing(buff) diff --git a/zict/tests/test_lru.py b/zict/tests/test_lru.py index 2cfe323..57344c4 100644 --- a/zict/tests/test_lru.py +++ b/zict/tests/test_lru.py @@ -33,7 +33,7 @@ def test_simple(): assert "y" not in lru lru["a"] = 5 - assert set(lru.keys()) == {"z", "a"} + assert set(lru) == {"z", "a"} def test_str(): diff --git a/zict/tests/test_zip.py b/zict/tests/test_zip.py index 5ae1753..4093fb2 100644 --- a/zict/tests/test_zip.py +++ b/zict/tests/test_zip.py @@ -1,22 +1,15 @@ -import os import zipfile from collections.abc import MutableMapping import pytest from zict import Zip +from zict.tests import utils_test @pytest.fixture -def fn(): - filename = ".tmp.zip" - if os.path.exists(filename): - os.remove(filename) - - yield filename - - if os.path.exists(filename): - os.remove(filename) +def fn(tmp_path): + yield tmp_path / "tmp.zip" def test_simple(fn): @@ -83,3 +76,39 @@ def test_bytearray(fn): with Zip(fn) as z: assert z["x"] == b"123" + + +def check_mapping(z): + """Shorter version of utils_test.check_mapping, as zip supports neither update nor + delete + """ + assert isinstance(z, MutableMapping) + assert not z + + assert list(z) == list(z.keys()) == [] + assert list(z.values()) == [] + assert list(z.items()) == [] + assert len(z) == 0 + + z["abc"] = b"456" + z["xyz"] = b"12" + assert len(z) == 2 + assert z["abc"] == b"456" + + utils_test.check_items(z, [("abc", b"456"), ("xyz", b"12")]) + + assert "abc" in z + assert "xyz" in z + assert "def" not in z + + with pytest.raises(KeyError): + z["def"] + + +def test_mapping(fn): + """ + Test mapping interface for Zip(). + """ + with Zip(fn) as z: + check_mapping(z) + utils_test.check_closing(z) diff --git a/zict/tests/utils_test.py b/zict/tests/utils_test.py index 549414b..bd5c777 100644 --- a/zict/tests/utils_test.py +++ b/zict/tests/utils_test.py @@ -1,6 +1,6 @@ import random import string -from collections.abc import MutableMapping +from collections.abc import ItemsView, KeysView, MutableMapping, ValuesView import pytest @@ -34,6 +34,19 @@ def check_items(z, expected_items): assert list(z.values()) == [v for k, v in items] assert list(z) == [k for k, v in items] + # ItemsView, KeysView, ValuesView.__contains__() + assert isinstance(z.keys(), KeysView) + assert isinstance(z.values(), ValuesView) + assert isinstance(z.items(), ItemsView) + assert items[0] in z.items() + assert items[0][0] in z.keys() + assert items[0][0] in z + assert items[0][1] in z.values() + assert (object(), object()) not in z.items() + assert object() not in z.keys() + assert object() not in z + assert object() not in z.values() + def stress_test_mapping_updates(z): # Certain mappings shuffle between several underlying stores @@ -67,6 +80,9 @@ def stress_test_mapping_updates(z): def check_mapping(z): + """See also test_zip.check_mapping""" + assert type(z).__name__ in str(z) + assert type(z).__name__ in repr(z) assert isinstance(z, MutableMapping) assert not z diff --git a/zict/zip.py b/zict/zip.py index 9ae73c3..d2902d4 100644 --- a/zict/zip.py +++ b/zict/zip.py @@ -59,20 +59,10 @@ def __getitem__(self, key: str) -> bytes: def __setitem__(self, key: str, value: bytes) -> None: self.file.writestr(key, value) - # FIXME dictionary views https://github.com/dask/zict/issues/61 - def keys(self) -> Iterator[str]: # type: ignore - return (zi.filename for zi in self.file.filelist) - - def values(self) -> Iterator[bytes]: # type: ignore - return (self.file.read(key) for key in self.keys()) - - def items(self) -> Iterator[tuple[str, bytes]]: # type: ignore - return ((zi.filename, self.file.read(zi.filename)) for zi in self.file.filelist) - def __iter__(self) -> Iterator[str]: - return self.keys() + return (zi.filename for zi in self.file.filelist) - def __delitem__(self, key: str) -> None: + def __delitem__(self, key: str) -> None: # pragma: nocover raise NotImplementedError("Not supported by stdlib zipfile") def __len__(self) -> int: