Skip to content

Commit

Permalink
Merge pull request #216 from capitalone/develop
Browse files Browse the repository at this point in the history
Release v0.10.2
  • Loading branch information
Faisal authored Jun 19, 2023
2 parents b825618 + d0c0ef3 commit a2293f4
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
fail-fast: false
matrix:
python-version: [3.8, 3.9, '3.10']
spark-version: [3.0.3, 3.1.3, 3.2.3, 3.3.1]
spark-version: [3.0.3, 3.1.3, 3.2.3, 3.3.1, 3.4.0]
env:
PYTHON_VERSION: ${{ matrix.python-version }}
SPARK_VERSION: ${{ matrix.spark-version }}
Expand Down
2 changes: 1 addition & 1 deletion datacompy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.10.1"
__version__ = "0.10.2"

from datacompy.core import *
from datacompy.fugue import is_match, report
Expand Down
8 changes: 7 additions & 1 deletion datacompy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,13 @@ def compare_string_and_date_columns(col_1, col_2):
| (obj_column.isnull() & date_column.isnull())
)
except:
return pd.Series(False, index=col_1.index)
try:
return pd.Series(
(pd.to_datetime(obj_column, format="mixed") == date_column)
| (obj_column.isnull() & date_column.isnull())
)
except:
return pd.Series(False, index=col_1.index)


def get_merged_columns(original_df, merged_df, suffix):
Expand Down
4 changes: 2 additions & 2 deletions docs/source/pandas_usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ There's a number of limitations with ``datacompy``:

.. code-block:: python
from pandas.util.testing import assert_series_equal
from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_series_equal
from pandas.testing import assert_frame_equal
import numpy.testing as npt
#Compare two series
Expand Down
10 changes: 4 additions & 6 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ zip_safe = False
include_package_data = True
packages = find:
install_requires =
pandas<=1.5.3,>=0.25.0
numpy<=1.24.2,>=1.22.0
pandas<=2.0.2,>=0.25.0
numpy<=1.24.3,>=1.22.0
ordered-set<=4.1.0,>=4.0.2
fugue<=0.8.4,>=0.8.4
fugue<=0.8.5,>=0.8.5

[options.package_data]
* = templates/*
Expand All @@ -47,7 +47,6 @@ polars =
fugue[polars]
spark =
fugue[spark]
cloudpickle
dask =
fugue[dask]
ray =
Expand Down Expand Up @@ -101,8 +100,7 @@ python_version = 3.9
conda_install =
openjdk=8
extras =
tests
spark
dev
command =
pytest tests -m 'not integration'
upgrade =
Expand Down
25 changes: 19 additions & 6 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import numpy as np
import pandas as pd
import pytest
from pandas.util.testing import assert_series_equal
from pandas.testing import assert_series_equal
from pytest import raises

import datacompy
Expand Down Expand Up @@ -167,8 +167,13 @@ def test_date_columns_equal_with_ignore_spaces():
assert_series_equal(expect_out, actual_out, check_names=False)

# Then compare converted to datetime objects
df["a"] = pd.to_datetime(df["a"])
df["b"] = pd.to_datetime(df["b"])
try:
df["a"] = pd.to_datetime(df["a"], format="mixed")
df["b"] = pd.to_datetime(df["b"], format="mixed")
except ValueError:
df["a"] = pd.to_datetime(df["a"])
df["b"] = pd.to_datetime(df["b"])

actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
Expand Down Expand Up @@ -196,8 +201,13 @@ def test_date_columns_equal_with_ignore_spaces_and_case():
assert_series_equal(expect_out, actual_out, check_names=False)

# Then compare converted to datetime objects
df["a"] = pd.to_datetime(df["a"])
df["b"] = pd.to_datetime(df["b"])
try:
df["a"] = pd.to_datetime(df["a"], format="mixed")
df["b"] = pd.to_datetime(df["b"], format="mixed")
except ValueError:
df["a"] = pd.to_datetime(df["a"])
df["b"] = pd.to_datetime(df["b"])

actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
Expand Down Expand Up @@ -246,7 +256,10 @@ def test_rounded_date_columns():
{"a": "2017-01-01", "b": "2017-01-01 00:00:00", "exp": True},
]
)
df["a_dt"] = pd.to_datetime(df["a"])
try:
df["a_dt"] = pd.to_datetime(df["a"], format="mixed")
except ValueError:
df["a_dt"] = pd.to_datetime(df["a"])
actual = datacompy.columns_equal(df.a_dt, df.b)
expected = df["exp"]
assert_series_equal(actual, expected, check_names=False)
Expand Down
3 changes: 3 additions & 0 deletions tests/test_fugue.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def test_is_match_spark(
space_df,
upper_col_df,
):
ref_df.iteritems = ref_df.items # pandas 2 compatibility
rdf = spark_session.createDataFrame(ref_df)

assert is_match(rdf, shuffle_df, join_columns="a")
Expand Down Expand Up @@ -263,6 +264,8 @@ def test_report_pandas(simple_diff_df1, simple_diff_df2):


def test_report_spark(spark_session, simple_diff_df1, simple_diff_df2):
simple_diff_df1.iteritems = simple_diff_df1.items # pandas 2 compatibility
simple_diff_df2.iteritems = simple_diff_df2.items # pandas 2 compatibility
df1 = spark_session.createDataFrame(simple_diff_df1)
df2 = spark_session.createDataFrame(simple_diff_df2)
comp = Compare(simple_diff_df1, simple_diff_df2, join_columns="aa")
Expand Down
11 changes: 7 additions & 4 deletions tests/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,12 @@
# (if we need to use these in other modules, move to conftest.py)
@pytest.fixture(scope="module", name="spark")
def spark_fixture():
spark = SparkSession.builder.master("local[2]").config("spark.driver.bindAddress", "127.0.0.1").appName("pytest").getOrCreate()
spark = (
SparkSession.builder.master("local[2]")
.config("spark.driver.bindAddress", "127.0.0.1")
.appName("pytest")
.getOrCreate()
)
yield spark
spark.stop()

Expand Down Expand Up @@ -2076,9 +2081,7 @@ def text_alignment_validator(
break

if not match:
raise AssertionError(
f'Did not find a match for line: "{line}"'
)
raise AssertionError(f'Did not find a match for line: "{line}"')

for n in left_indices:
assert match_positions[n] == match.start(n)
Expand Down

0 comments on commit a2293f4

Please sign in to comment.