Skip to content

Commit

Permalink
Merge pull request #1 from ethho/release/v0.3.0
Browse files Browse the repository at this point in the history
Version 0.3.0
  • Loading branch information
ethho authored May 12, 2023
2 parents bef633b + e644516 commit 4e6b097
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 49 deletions.
46 changes: 14 additions & 32 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "memoize"
version = "0.2.1"
version = "0.3.0"
description = "Python3 memoization decorator"
authors = ["Ethan Ho <[email protected]>"]
license = "MIT"
Expand Down
30 changes: 20 additions & 10 deletions src/memoize/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,34 @@

from .main import _clean_func_name, _get_hist_fps, _make_key


def _read_csv(fp: str) -> pd.DataFrame:
def _read(ext: str, fp: str) -> pd.DataFrame:
"""Reads DataFrame from CSV file at `fp`."""
return pd.read_csv(fp)
if ext == 'csv':
return pd.read_csv(fp)
elif ext == 'parquet':
return pd.read_parquet(fp)
else:
raise Exception(f"Unsupported file extension {ext}")


def _write_csv(fp: str, df: pd.DataFrame):
"""Write DataFrame to CSV file at `fp` from DataFrame `df`."""
write_index = bool(df.index.name)
return df.to_csv(fp, index=write_index)
def _write(ext: str, fp: str, df: pd.DataFrame):
if ext == 'csv':
write_index = bool(df.index.name)
return df.to_csv(fp, index=write_index)
elif ext == 'parquet':
if not pd.api.types.is_object_dtype(df.columns.dtype):
print(f"WARNING: Converting column names to string dtype")
df.columns = df.columns.astype(str)
return df.to_parquet(fp)
else:
raise Exception(f"Unsupported file extension {ext}")


def memoize_df(
stub: Optional[str] = None,
cache_dir: Optional[str] = '/tmp/memoize',
ext: str = 'csv',
log_func: Callable = print,
ignore_invalid: bool = True,
cache_lifetime_days: int = 0
) -> Callable:
"""
Expand Down Expand Up @@ -61,7 +71,7 @@ def memoize_dec(*args, **kwargs):
if not kwargs.get('_memoize_force_refresh'):
for hist_fp in hist_fps:
log_func(f"Using cached call from {hist_fp}")
return _read_csv(hist_fp)
return _read(ext, hist_fp)

# Else run the function and store cached result
result = func(*args, **kwargs)
Expand All @@ -71,7 +81,7 @@ def memoize_dec(*args, **kwargs):
f"Failed to write return value of function '{funcname}' to CSV file. "
f"Expected a pandas.DataFrame, received {type(result)}."
)
_write_csv(fp, result)
_write(ext, fp, result)
return result
return memoize_dec
return add_memoize_dec
Expand Down
8 changes: 4 additions & 4 deletions src/memoize/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def _make_key(func_name: str, args: List, kwargs: Dict, maxlen: int = None) -> s
d['_func_name'] = func_name
d['_args'] = args
hl = hashlib.new('sha256')
hl.update(json.dumps(d, sort_keys=True).encode())
hl.update(json.dumps(d, sort_keys=True).encode())
as_str = hl.hexdigest()
if maxlen:
as_str = as_str[:maxlen]
Expand Down Expand Up @@ -48,7 +48,7 @@ def _get_hist_fps(query: str, cache_lifetime_days: int = None) -> List[str]:
except Exception as err:
raise
dt_grps.append(item)

fps = [
file['fp'] for file in
# Sort filepaths starting with most recent
Expand Down Expand Up @@ -92,7 +92,7 @@ def memoize(stub: Optional[str] = None,
raise Exception(f'{cache_dir=} exists but is not a directory')
else:
os.makedirs(cache_dir)
stub = stub if stub else date.today().strftime('%Y%m%d')
stub = stub if stub else date.today().strftime('%Y%m%d')

def add_memoize_dec(func):
funcname = _clean_func_name(func.__name__)
Expand All @@ -118,7 +118,7 @@ def memoize_dec(*args, **kwargs):
text = json.dumps(cache)
f.write(text)
return cache[key]

# Else run the function and store cached result
result = func(*args, **kwargs)
cache[key] = result
Expand Down
5 changes: 3 additions & 2 deletions tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ def example_func(foo: int):
return df


def test_memoize():
wrapped = memoize_df(cache_lifetime_days=None)(example_func)
@pytest.mark.parametrize('ext', ['csv', 'parquet'])
def test_memoize(ext):
wrapped = memoize_df(cache_lifetime_days=None, ext=ext)(example_func)
print(wrapped(2))
print(wrapped(3))
print(wrapped(5))
Expand Down

0 comments on commit 4e6b097

Please sign in to comment.