Skip to content

Commit

Permalink
Thread-safe Buffer.__items__; proper KeysView, ItemsView, ValuesView (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
crusaderky authored Mar 29, 2023
1 parent adaf758 commit 9f62731
Show file tree
Hide file tree
Showing 14 changed files with 193 additions and 80 deletions.
17 changes: 17 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[run]
source =
zict
omit =

[report]
show_missing = True
exclude_lines =
# re-enable the standard pragma
pragma: nocover
pragma: no cover
# always ignore type checking blocks
TYPE_CHECKING
@overload

[html]
directory = coverage_html_report
4 changes: 3 additions & 1 deletion doc/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ Changelog
- ``LMDB`` now uses memory-mapped I/O on MacOSX and is usable on Windows.
(:pr:`78`) `Guido Imperiale`_
- The library is now partially thread-safe.
(:pr:`82`, :pr:`90`) `Guido Imperiale`_
(:pr:`82`, :pr:`90`, :pr:`93`) `Guido Imperiale`_
- :class:`LRU` and :class:`Buffer` now support delayed eviction.
New objects :class:`Accumulator` and :class:`InsertionSortedSet`.
(:pr:`87`) `Guido Imperiale`_
- All mappings now return proper KeysView, ItemsView, and ValuesView objects from their
keys(), items(), and values() methods (:pr:`93`) `Guido Imperiale`_


2.2.0 - 2022-04-28
Expand Down
53 changes: 44 additions & 9 deletions zict/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

from collections.abc import Callable, Iterator, MutableMapping
from itertools import chain
from typing import ( # TODO import from collections.abc (needs Python >=3.9)
ItemsView,
ValuesView,
)

from zict.common import KT, VT, ZictBase, close, flush
from zict.lru import LRU
Expand Down Expand Up @@ -136,21 +140,30 @@ def __delitem__(self, key: KT) -> None:
except KeyError:
del self.slow[key]

# FIXME dictionary views https://github.com/dask/zict/issues/61
def keys(self) -> Iterator[KT]: # type: ignore
return iter(self)
def values(self) -> ValuesView[VT]:
return BufferValuesView(self)

def values(self) -> Iterator[VT]: # type: ignore
return chain(self.fast.values(), self.slow.values())

def items(self) -> Iterator[tuple[KT, VT]]: # type: ignore
return chain(self.fast.items(), self.slow.items())
def items(self) -> ItemsView[KT, VT]:
return BufferItemsView(self)

def __len__(self) -> int:
return len(self.fast) + len(self.slow)

def __iter__(self) -> Iterator[KT]:
return chain(self.fast, self.slow)
"""Make sure that the iteration is not disrupted if you evict/restore a key in
the middle of it
"""
seen = set()
while True:
try:
for d in (self.fast, self.slow):
for key in d:
if key not in seen:
seen.add(key)
yield key
return
except RuntimeError:
pass

def __contains__(self, key: object) -> bool:
return key in self.fast or key in self.slow
Expand All @@ -165,3 +178,25 @@ def flush(self) -> None:

def close(self) -> None:
close(self.fast, self.slow)


class BufferItemsView(ItemsView[KT, VT]):
_mapping: Buffer # FIXME CPython implementation detail
__slots__ = ()

def __iter__(self) -> Iterator[tuple[KT, VT]]:
# Avoid changing the LRU
return chain(self._mapping.fast.items(), self._mapping.slow.items())


class BufferValuesView(ValuesView[VT]):
_mapping: Buffer # FIXME CPython implementation detail
__slots__ = ()

def __contains__(self, value: object) -> bool:
# Avoid changing the LRU
return any(value == v for v in self)

def __iter__(self) -> Iterator[VT]:
# Avoid changing the LRU
return chain(self._mapping.fast.values(), self._mapping.slow.values())
8 changes: 1 addition & 7 deletions zict/cache.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import weakref
from collections.abc import Iterator, KeysView, MutableMapping
from collections.abc import Iterator, MutableMapping
from typing import TYPE_CHECKING

from zict.common import KT, VT, ZictBase, close, flush
Expand Down Expand Up @@ -67,7 +67,6 @@ def __getitem__(self, key: KT) -> VT:
def __setitem__(self, key: KT, value: VT) -> None:
# If the item was already in cache and data.__setitem__ fails, e.g. because it's
# a File and the disk is full, make sure that the cache is invalidated.
# FIXME https://github.com/python/mypy/issues/10152
try:
del self.cache[key]
except KeyError:
Expand All @@ -94,11 +93,6 @@ def __contains__(self, key: object) -> bool:
# Do not let MutableMapping call self.data[key]
return key in self.data

def keys(self) -> KeysView[KT]:
# Return a potentially optimized set-like, instead of letting MutableMapping
# build it from __iter__ on the fly
return self.data.keys()

def flush(self) -> None:
flush(self.cache, self.data)

Expand Down
5 changes: 1 addition & 4 deletions zict/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import mmap
import os
import pathlib
from collections.abc import Iterator, KeysView
from collections.abc import Iterator
from urllib.parse import quote, unquote

from zict.common import ZictBase
Expand Down Expand Up @@ -139,9 +139,6 @@ def __setitem__(
def __contains__(self, key: object) -> bool:
return key in self.filenames

def keys(self) -> KeysView[str]:
return self.filenames.keys()

def __iter__(self) -> Iterator[str]:
return iter(self.filenames)

Expand Down
12 changes: 1 addition & 11 deletions zict/func.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from collections.abc import Callable, Iterable, Iterator, KeysView, MutableMapping
from collections.abc import Callable, Iterable, Iterator, MutableMapping
from typing import Generic, TypeVar

from zict.common import KT, VT, ZictBase, close, flush
Expand Down Expand Up @@ -71,16 +71,6 @@ def __contains__(self, key: object) -> bool:
def __delitem__(self, key: KT) -> None:
del self.d[key]

def keys(self) -> KeysView[KT]:
return self.d.keys()

# FIXME dictionary views https://github.com/dask/zict/issues/61
def values(self) -> Iterator[VT]: # type: ignore
return (self.load(v) for v in self.d.values())

def items(self) -> Iterator[tuple[KT, VT]]: # type: ignore
return ((k, self.load(v)) for k, v in self.d.items())

def _do_update(self, items: Iterable[tuple[KT, VT]]) -> None:
it = ((k, self.dump(v)) for k, v in items)
self.d.update(it)
Expand Down
54 changes: 42 additions & 12 deletions zict/lmdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import pathlib
import sys
from collections.abc import Iterable, Iterator
from typing import ( # TODO import from collections.abc (needs Python >=3.9)
ItemsView,
ValuesView,
)

from zict.common import ZictBase

Expand Down Expand Up @@ -77,18 +81,15 @@ def __contains__(self, key: object) -> bool:
with self.db.begin() as txn:
return txn.cursor().set_key(_encode_key(key))

# FIXME dictionary views https://github.com/dask/zict/issues/61
def items(self) -> Iterator[tuple[str, bytes]]: # type: ignore
cursor = self.db.begin().cursor()
return ((_decode_key(k), v) for k, v in cursor.iternext(keys=True, values=True))

def keys(self) -> Iterator[str]: # type: ignore
def __iter__(self) -> Iterator[str]:
cursor = self.db.begin().cursor()
return (_decode_key(k) for k in cursor.iternext(keys=True, values=False))

def values(self) -> Iterator[bytes]: # type: ignore
cursor = self.db.begin().cursor()
return cursor.iternext(keys=False, values=True)
def items(self) -> ItemsView[str, bytes]:
return LMDBItemsView(self)

def values(self) -> ValuesView[bytes]:
return LMDBValuesView(self)

def _do_update(self, items: Iterable[tuple[str, bytes]]) -> None:
# Optimized version of update() using a single putmulti() call.
Expand All @@ -97,9 +98,6 @@ def _do_update(self, items: Iterable[tuple[str, bytes]]) -> None:
consumed, added = txn.cursor().putmulti(items_enc)
assert consumed == added == len(items_enc)

def __iter__(self) -> Iterator[str]:
return self.keys()

def __delitem__(self, key: str) -> None:
with self.db.begin(write=True) as txn:
if not txn.delete(_encode_key(key)):
Expand All @@ -110,3 +108,35 @@ def __len__(self) -> int:

def close(self) -> None:
self.db.close()


class LMDBItemsView(ItemsView[str, bytes]):
_mapping: LMDB # FIXME CPython implementation detail
__slots__ = ()

def __contains__(self, item: object) -> bool:
key: str
value: object
key, value = item # type: ignore
try:
v = self._mapping[key]
except (KeyError, AttributeError):
return False
else:
return v == value

def __iter__(self) -> Iterator[tuple[str, bytes]]:
cursor = self._mapping.db.begin().cursor()
return ((_decode_key(k), v) for k, v in cursor.iternext(keys=True, values=True))


class LMDBValuesView(ValuesView[bytes]):
_mapping: LMDB # FIXME CPython implementation detail
__slots__ = ()

def __contains__(self, value: object) -> bool:
return any(value == v for v in self)

def __iter__(self) -> Iterator[bytes]:
cursor = self._mapping.db.begin().cursor()
return cursor.iternext(keys=False, values=True)
13 changes: 1 addition & 12 deletions zict/sieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from collections import defaultdict
from collections.abc import Callable, Iterable, Iterator, Mapping, MutableMapping
from itertools import chain
from typing import Generic, TypeVar

from zict.common import KT, VT, ZictBase, close, flush
Expand Down Expand Up @@ -92,21 +91,11 @@ def _do_update(self, items: Iterable[tuple[KT, VT]]) -> None:
for key, _ in mitems:
self.key_to_mapping[key] = mapping

# FIXME dictionary views https://github.com/dask/zict/issues/61
def keys(self) -> Iterator[KT]: # type: ignore
return chain.from_iterable(self.mappings.values())

def values(self) -> Iterator[VT]: # type: ignore
return chain.from_iterable(m.values() for m in self.mappings.values())

def items(self) -> Iterator[tuple[KT, VT]]: # type: ignore
return chain.from_iterable(m.items() for m in self.mappings.values())

def __len__(self) -> int:
return sum(map(len, self.mappings.values()))

def __iter__(self) -> Iterator[KT]:
return self.keys()
return iter(self.key_to_mapping)

def __contains__(self, key: object) -> bool:
return key in self.key_to_mapping
Expand Down
14 changes: 14 additions & 0 deletions zict/tests/test_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,17 @@ def test_set_noevict():
assert a == {"y": 3, "x": 1}
assert b == {"z": 6}
assert f2s == s2f == []


def test_evict_restore_during_iter():
"""Test that __iter__ won't be disrupted if another thread evicts or restores a key"""
buff = Buffer({"x": 1, "y": 2}, {"z": 3}, n=5)
assert list(buff) == ["x", "y", "z"]
it = iter(buff)
assert next(it) == "x"
buff.fast.evict("x")
assert next(it) == "y"
assert buff["x"] == 1
assert next(it) == "z"
with pytest.raises(StopIteration):
next(it)
10 changes: 10 additions & 0 deletions zict/tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from zict import Cache, WeakValueMapping
from zict.tests import utils_test


def test_cache_get_set_del():
Expand Down Expand Up @@ -129,3 +130,12 @@ class C:
b = "bbb"
d["b"] = b
assert "b" not in d


def test_mapping():
"""
Test mapping interface for Cache().
"""
buff = Cache({}, {})
utils_test.check_mapping(buff)
utils_test.check_closing(buff)
2 changes: 1 addition & 1 deletion zict/tests/test_lru.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_simple():
assert "y" not in lru

lru["a"] = 5
assert set(lru.keys()) == {"z", "a"}
assert set(lru) == {"z", "a"}


def test_str():
Expand Down
Loading

0 comments on commit 9f62731

Please sign in to comment.