Skip to content

Commit

Permalink
Comparison row count check secondary datasource filter fix
Browse files Browse the repository at this point in the history
  • Loading branch information
asantoz committed Oct 14, 2024
1 parent 8586ed4 commit 609949d
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
7 changes: 7 additions & 0 deletions soda/core/soda/execution/check/row_count_comparison_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(
)

data_source_scan = self.data_source_scan
data_source_scan_cfg = self.data_source_scan.data_source_scan_cfg
scan = data_source_scan.scan

row_count_comparison_check_cfg: RowCountComparisonCheckCfg = self.check_cfg
Expand All @@ -38,6 +39,12 @@ def __init__(
other_table: Table = data_source_scan.get_or_create_table(row_count_comparison_check_cfg.other_table_name)
self.other_partition = other_table.get_or_create_partition(row_count_comparison_check_cfg.other_partition_name)

# If the other partition is None, we ignore the partition_cfg setup
if row_count_comparison_check_cfg.other_partition_name:
other_table_cfg = data_source_scan_cfg.get_or_create_table_cfg(row_count_comparison_check_cfg.other_table_name)
other_partition_cfg = other_table_cfg.find_partition(row_count_comparison_check_cfg.location.file_path, row_count_comparison_check_cfg.other_partition_name)
self.other_partition.set_partition_cfg(other_partition_cfg)

self.metrics["row_count"] = self.data_source_scan.resolve_metric(
NumericQueryMetric(
data_source_scan=self.data_source_scan,
Expand Down
30 changes: 30 additions & 0 deletions soda/core/tests/data_source/test_row_count_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,33 @@ def test_row_count_comparison_cross_data_source(data_source_fixture: DataSourceF
scan.execute()

scan.assert_all_checks_pass()

def test_row_count_comparison_cross_data_source_with_filter(data_source_fixture: DataSourceFixture):
"""Does not really create two connections and test cross data sources with filtering, that is handled in integration tests.
Tests syntax parsing and check execution.
"""
customers_table_name = data_source_fixture.ensure_test_table(customers_test_table)
rawcustomers_table_name = data_source_fixture.ensure_test_table(raw_customers_test_table)

# Reuse the same data source name
other_data_source_name = data_source_fixture.data_source.data_source_name

scan = data_source_fixture.create_test_scan()
scan.add_sodacl_yaml_str(
f"""
filter {customers_table_name} [daily]:
where: cst_size IS NULL
filter {rawcustomers_table_name} [daily-ref]:
where: cst_size IS NULL
checks for {customers_table_name} [daily]:
- row_count same as {rawcustomers_table_name} daily-ref in {other_data_source_name}
"""
)

scan.execute()
print(scan.get_passing_queries())

scan.assert_all_checks_pass()

0 comments on commit 609949d

Please sign in to comment.