From 930e0386d7e546eef159f1307cfe789daadf3ed4 Mon Sep 17 00:00:00 2001 From: Faisal Date: Tue, 12 Mar 2024 11:10:29 -0300 Subject: [PATCH] bug fix when all columns match but no rows match (#277) * fixes #276 * bump version --- datacompy/__init__.py | 2 +- datacompy/spark.py | 36 ++++++++++++++++++++++++------------ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/datacompy/__init__.py b/datacompy/__init__.py index 2231c88f..3e5cb57b 100644 --- a/datacompy/__init__.py +++ b/datacompy/__init__.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.11.0" +__version__ = "0.11.1" from datacompy.core import * from datacompy.fugue import ( diff --git a/datacompy/spark.py b/datacompy/spark.py index 45fe3419..f27d2e58 100644 --- a/datacompy/spark.py +++ b/datacompy/spark.py @@ -722,18 +722,30 @@ def _print_row_matches_by_column(self, myfile: TextIO) -> None: for key in self.columns_match_dict if self.columns_match_dict[key][MatchType.MISMATCH.value] } - columns_fully_matching = { - key: self.columns_match_dict[key] - for key in self.columns_match_dict - if sum(self.columns_match_dict[key]) - == self.columns_match_dict[key][MatchType.MATCH.value] - } - columns_with_any_diffs = { - key: self.columns_match_dict[key] - for key in self.columns_match_dict - if sum(self.columns_match_dict[key]) - != self.columns_match_dict[key][MatchType.MATCH.value] - } + + # corner case: when all columns match but no rows match + # issue: #276 + try: + columns_fully_matching = { + key: self.columns_match_dict[key] + for key in self.columns_match_dict + if sum(self.columns_match_dict[key]) + == self.columns_match_dict[key][MatchType.MATCH.value] + } + except TypeError: + columns_fully_matching = {} + + try: + columns_with_any_diffs = { + key: self.columns_match_dict[key] + for key in self.columns_match_dict + if sum(self.columns_match_dict[key]) + != self.columns_match_dict[key][MatchType.MATCH.value] + } + except TypeError: + columns_with_any_diffs = {} + # + base_types = {x[0]: x[1] for x in self.base_df.dtypes} compare_types = {x[0]: x[1] for x in self.compare_df.dtypes}