Skip to content

Commit

Permalink
Format and docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
jdawang committed Dec 7, 2023
1 parent bfcb1cb commit e512803
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 27 deletions.
1 change: 1 addition & 0 deletions tests/test_fugue/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import pandas as pd


@pytest.fixture
def ref_df():
np.random.seed(0)
Expand Down
15 changes: 15 additions & 0 deletions tests/test_fugue/test_duckdb.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
#
# Copyright 2020 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test fugue functionality with duckdb."""
import pytest
from ordered_set import OrderedSet
from pytest import raises
Expand Down
5 changes: 0 additions & 5 deletions tests/test_fugue/test_fuge_helpers.py

This file was deleted.

22 changes: 22 additions & 0 deletions tests/test_fugue/test_fugue_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#
# Copyright 2020 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""One test helper for fugue reports."""

def _compare_report(expected, actual, truncate=False):
"""Compare datacompy reports."""
if truncate:
expected = expected.split("Sample Rows", 1)[0]
actual = actual.split("Sample Rows", 1)[0]
assert expected == actual
20 changes: 4 additions & 16 deletions tests/test_fugue/test_fugue_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Testing out the fugue is_match functionality
"""
"""Test the fugue functionality with pandas."""
from io import StringIO
import pandas as pd
from ordered_set import OrderedSet
Expand All @@ -31,7 +28,7 @@
unq_columns,
)

from test_fuge_helpers import _compare_report
from test_fugue_helpers import _compare_report


def test_is_match_native(
Expand Down Expand Up @@ -81,9 +78,6 @@ def test_is_match_native(
)





def test_doc_case():
data1 = """acct_id,dollar_amt,name,float_fld,date_fld
10000001234,123.45,George Maharis,14530.1555,2017-01-01
Expand Down Expand Up @@ -115,6 +109,7 @@ def test_doc_case():
parallelism=2,
)


def test_report_pandas(
simple_diff_df1,
simple_diff_df2,
Expand Down Expand Up @@ -146,6 +141,7 @@ def test_report_pandas(
a = report(large_diff_df1, large_diff_df2, "x", parallelism=2)
_compare_report(comp.report(), a, truncate=True)


def test_unique_columns_native(ref_df):
df1 = ref_df[0]
df1_copy = ref_df[1]
Expand All @@ -159,9 +155,6 @@ def test_unique_columns_native(ref_df):
assert unq_columns(df3, df2) == OrderedSet(["c"])





def test_intersect_columns_native(ref_df):
df1 = ref_df[0]
df1_copy = ref_df[1]
Expand All @@ -175,8 +168,6 @@ def test_intersect_columns_native(ref_df):
assert intersect_columns(df3, df2) == OrderedSet()




def test_all_columns_match_native(ref_df):
df1 = ref_df[0]
df1_copy = ref_df[1]
Expand All @@ -190,8 +181,6 @@ def test_all_columns_match_native(ref_df):
assert all_columns_match(df3, df2) is False




def test_all_rows_overlap_native(
ref_df,
shuffle_df,
Expand All @@ -203,4 +192,3 @@ def test_all_rows_overlap_native(
# Fugue
assert all_rows_overlap(ref_df[0], shuffle_df, join_columns="a", parallelism=2)
assert not all_rows_overlap(ref_df[0], ref_df[4], join_columns="a", parallelism=2)

26 changes: 21 additions & 5 deletions tests/test_fugue/test_fugue_polars.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
#
# Copyright 2020 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test fugue and polars."""
import pytest
from ordered_set import OrderedSet
from pytest import raises
Expand All @@ -9,8 +24,10 @@
is_match,
unq_columns,
)

pl = pytest.importorskip("polars")


def test_is_match_polars(
ref_df,
shuffle_df,
Expand All @@ -19,7 +36,6 @@ def test_is_match_polars(
space_df,
upper_col_df,
):

rdf = pl.from_pandas(ref_df[0])

assert is_match(rdf, shuffle_df, join_columns="a")
Expand All @@ -39,8 +55,8 @@ def test_is_match_polars(
with raises(AssertionError):
is_match(rdf, upper_col_df, join_columns="a", cast_column_names_lower=False)

def test_unique_columns_polars(ref_df):

def test_unique_columns_polars(ref_df):
df1 = ref_df[0]
df1_copy = ref_df[1]
df2 = ref_df[2]
Expand All @@ -57,8 +73,8 @@ def test_unique_columns_polars(ref_df):
assert unq_columns(pdf1_copy, pdf1) == OrderedSet()
assert unq_columns(pdf3, pdf2) == OrderedSet(["c"])

def test_intersect_columns_polars(ref_df):

def test_intersect_columns_polars(ref_df):
df1 = ref_df[0]
df1_copy = ref_df[1]
df2 = ref_df[2]
Expand All @@ -75,8 +91,8 @@ def test_intersect_columns_polars(ref_df):
assert intersect_columns(pdf1_copy, pdf1) == OrderedSet(["a", "b", "c"])
assert intersect_columns(pdf3, pdf2) == OrderedSet()

def test_all_columns_match_polars(ref_df):

def test_all_columns_match_polars(ref_df):
df1 = ref_df[0]
df1_copy = ref_df[1]
df2 = ref_df[2]
Expand All @@ -93,11 +109,11 @@ def test_all_columns_match_polars(ref_df):
assert all_columns_match(df1_copy, df1) is True
assert all_columns_match(df3, df2) is False


def test_all_rows_overlap_polars(
ref_df,
shuffle_df,
):

rdf = pl.from_pandas(ref_df[0])
rdf_copy = pl.from_pandas(ref_df[0].copy())
rdf4 = pl.from_pandas(ref_df[4])
Expand Down
18 changes: 17 additions & 1 deletion tests/test_fugue/test_fugue_spark.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
#
# Copyright 2020 Capital One Services, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test fugue and spark."""
import pytest
from datacompy import (
Compare,
Expand All @@ -11,10 +26,11 @@
from ordered_set import OrderedSet
from pytest import raises

from test_fuge_helpers import _compare_report
from test_fugue_helpers import _compare_report

pyspark = pytest.importorskip("pyspark")


def test_is_match_spark(
spark_session,
ref_df,
Expand Down

0 comments on commit e512803

Please sign in to comment.