Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 2.7.0 #16

Merged
merged 38 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
8f38d9a
TG-734: [UI] Create a Cancel button to kill processes
alex-datakitchen Jun 7, 2024
4b03592
Merge branch 'alex/TG-734' into 'enterprise'
cbloche Jun 10, 2024
9d62573
feat(test types): new multi-table test types, log viewer, and assorte…
cbloche Jun 11, 2024
29354a0
Merge branch 'chip/aggregate_tests_20240610' into 'enterprise'
alex-datakitchen Jun 11, 2024
7f2a375
Fixed linter issues
cbloche Jun 11, 2024
1ef612d
fix(mssql): fixes to distribution shift test
cbloche Jun 12, 2024
ea0fd85
Merge branch 'chip/aggregate_tests_20240610' into 'enterprise'
Jun 12, 2024
2128295
Merge branch 'main' into 'enterprise'
aarthy-dk Jun 14, 2024
8220bda
release: 2.1.4 -> 2.1.8
luis-dk May 31, 2024
ff4b5e5
Update contributors file
aarthy-dk Jun 4, 2024
e6db447
Refactor log system
alex-datakitchen Jun 14, 2024
b9537f3
Merge branch 'enterprise' into 'alex/TG-735'
alex-datakitchen Jun 14, 2024
8e50750
Merge branch 'alex/TG-735' into 'enterprise'
Jun 17, 2024
555194e
fix(ui): unset bg color of detail values on dark mode
luis-dk Jun 17, 2024
ea2b15d
Merge branch 'fix/dark-theme' into 'enterprise'
Jun 17, 2024
1a2df94
Fixed label, help in Test Definitions
cbloche Jun 17, 2024
bc08655
Update file functional_datatype.sql
cbloche Jun 18, 2024
67ab3f4
Merge branch 'chip/test_def_fix_20240617' into 'enterprise'
Jun 18, 2024
8d10315
Re-enable incremental script testing
alex-datakitchen Jun 19, 2024
3af847b
Merge branch 'TG-725' into 'enterprise'
Jun 20, 2024
0801048
wildcard % doubled after quick-start
alex-datakitchen Jun 20, 2024
d530e4a
Merge branch 'alex/TG-745' into 'enterprise'
Jun 20, 2024
5999c81
[UI] [OPEN] Create QC Title is showing up twice
alex-datakitchen Jun 21, 2024
06e9ae3
Merge branch 'alex/TG-744-full-header' into 'enterprise'
alex-datakitchen Jun 24, 2024
9ae7950
Improved profiling functionality, test definition uniqueness
cbloche Jul 16, 2024
0de8355
misc(ui): change wording from profiling anomalies to hygiene issues i…
cbloche Jul 16, 2024
ed6babe
fix(dbupgrade): add missing index
cbloche Jul 16, 2024
6af9d35
fix(dbupgrade): renumbered upgrade script
cbloche Jul 16, 2024
fbadf85
add logs to functional tests
alex-datakitchen Jul 17, 2024
c8f07e9
add logs to functional tests. changed
alex-datakitchen Jul 17, 2024
5d3e22c
misc: fix ci/cd
alex-datakitchen Jul 17, 2024
711e371
Merge branch 'chip/structural_enhancements_20240716' into 'enterprise'
Jul 17, 2024
17c3840
Merge branch 'main' into 'enterprise'
alex-datakitchen Jul 17, 2024
f8a621c
#13: Support KeyPair Authentication for Snowflake
alex-datakitchen Jul 18, 2024
6e63012
Merge branch 'alex/snowflake-key-value-pair' into 'enterprise'
Jul 18, 2024
5b9931c
feat(cli/ui): scan for personally identifying information (pii)
cbloche Jul 21, 2024
52696d3
Merge branch 'chip/pii_scan_20240721' into 'enterprise'
Jul 22, 2024
d9ca130
Merge branch 'main' into 'enterprise'
aarthy-dk Jul 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ RUN TG_METADATA_DB_USER=- TG_METADATA_DB_PASSWORD=- TG_METADATA_DB_HOST=- TG_MET
ARG TESTGEN_VERSION
ENV TESTGEN_VERSION=v$TESTGEN_VERSION

ENV STREAMLIT_SERVER_MAX_UPLOAD_SIZE=2

WORKDIR /dk

ENTRYPOINT ["testgen"]
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ dependencies = [
"beautifulsoup4==4.12.3",
"trino==0.327.0",
"xlsxwriter==3.2.0",
"psutil==5.9.8",
"concurrent_log_handler==0.9.25",
"cryptography==42.0.8",
]

[project.optional-dependencies]
Expand Down
8 changes: 4 additions & 4 deletions testgen/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
)
from testgen.utils import plugins

LOG = logging.getLogger("testgen.cli")
LOG = logging.getLogger("testgen")


@dataclass
Expand All @@ -73,9 +73,9 @@ class Configuration:
@click.pass_context
def cli(ctx: Context, verbose: bool):
if verbose:
configure_logging(level=logging.INFO, log_to_file=settings.LOG_TO_FILE)
configure_logging(level=logging.DEBUG)
else:
configure_logging(level=logging.WARNING, log_to_file=settings.LOG_TO_FILE)
configure_logging(level=logging.INFO)

ctx.obj = Configuration(verbose=verbose)
status_ok, message = docker_service.check_basic_configuration()
Expand Down Expand Up @@ -714,7 +714,7 @@ def run(debug: bool):
)

status_code: int = -1
logger = logging.getLogger("testgen.ui")
logger = logging.getLogger("testgen")
stderr: typing.TextIO = typing.cast(typing.TextIO, logs.LogPipe(logger, logging.INFO))
stdout: typing.TextIO = typing.cast(typing.TextIO, logs.LogPipe(logger, logging.INFO))

Expand Down
68 changes: 35 additions & 33 deletions testgen/commands/queries/execute_tests_query.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,6 @@
import typing

from testgen.common import CleanSQL, date_service, read_template_sql_file


def add_quote_to_identifiers(strInput):
keywords = [
"select",
"from",
"where",
"order",
"by",
"having",
] # NOTE: In future we might have to expand the list of keywords

quoted_values = []
for value in strInput.split(","):
value = value.strip()
if value.startswith('"') and value.endswith('"'):
quoted_values.append(value)
elif any(c.isupper() or c.isspace() or value.lower() in keywords for c in value):
quoted_values.append(f'"{value}"')
else:
quoted_values.append(value)
return ", ".join(quoted_values)
from testgen.common import AddQuotesToIdentifierCSV, CleanSQL, ConcatColumnList, date_service, read_template_sql_file


class CTestExecutionSQL:
Expand All @@ -32,8 +10,9 @@ class CTestExecutionSQL:
test_suite = ""
test_run_id = ""
exception_message = ""
process_id = ""

# Test Set Parameters
# Test Group Parameters
dctTestParms: typing.ClassVar = {}
sum_columns = ""
match_sum_columns = ""
Expand All @@ -46,18 +25,31 @@ def __init__(self, strProjectCode, strFlavor, strTestSuite, minutes_offset=0):
self.today = date_service.get_now_as_string_with_offset(minutes_offset)
self.minutes_offset = minutes_offset

def _AssembleDisplayParameters(self):

lst_parms = ["column_name", "skip_errors", "baseline_ct", "baseline_unique_ct", "baseline_value",
"baseline_value_ct", "baseline_sum", "baseline_avg", "baseline_sd", "subset_condition",
"groupby_names", "having_condition", "window_date_column", "window_days",
"match_column_names", "match_subset_condition", "match_schema_name", "match_table_name",
"match_groupby_names", "match_having_condition",
]
str_parms = "; ".join(f"{key}={self.dctTestParms[key]}"
for key in lst_parms
if key.lower() in self.dctTestParms and self.dctTestParms[key] not in [None, ""])
str_parms = str_parms.replace("'", "`")
return str_parms

def _ReplaceParms(self, strInputString: str):
strInputString = strInputString.replace("{PROJECT_CODE}", self.project_code)
strInputString = strInputString.replace("{TEST_SUITE}", self.test_suite)
strInputString = strInputString.replace("{SQL_FLAVOR}", self.flavor)
strInputString = strInputString.replace("{TEST_RUN_ID}", self.test_run_id)
strInputString = strInputString.replace("{INPUT_PARAMETERS}", self._AssembleDisplayParameters())

strInputString = strInputString.replace("{RUN_DATE}", self.run_date)
strInputString = strInputString.replace("{SUM_COLUMNS}", self.sum_columns)
strInputString = strInputString.replace("{MATCH_SUM_COLUMNS}", self.match_sum_columns)
strInputString = strInputString.replace("{MULTI_COLUMN_ERROR_CONDITION}", self.multi_column_error_condition)
strInputString = strInputString.replace("{EXCEPTION_MESSAGE}", self.exception_message)
strInputString = strInputString.replace("{START_TIME}", self.today)
strInputString = strInputString.replace("{PROCESS_ID}", str(self.process_id))
strInputString = strInputString.replace(
"{NOW}", date_service.get_now_as_string_with_offset(self.minutes_offset)
)
Expand All @@ -67,21 +59,32 @@ def _ReplaceParms(self, strInputString: str):
# "COLUMN_NAMES",
# "COL_NAME",
# "COL_NAMES",
"MATCH_COLUMN_NAMES",
"MATCH_GROUPBY_NAMES",
# "MATCH_COLUMN_NAMES",
# "MATCH_GROUPBY_NAMES",
# "MATCH_SUM_COLUMNS",
]

for parm, value in self.dctTestParms.items():
if value:
if parm.upper() in column_designators:
strInputString = strInputString.replace("{" + parm.upper() + "}", add_quote_to_identifiers(value))
strInputString = strInputString.replace("{" + parm.upper() + "}", AddQuotesToIdentifierCSV(value))
else:
strInputString = strInputString.replace("{" + parm.upper() + "}", value)
else:
strInputString = strInputString.replace("{" + parm.upper() + "}", "")
if parm == "column_name":
strInputString = strInputString.replace("{COLUMN_NAME_DISPLAY}", value if value else "")
# Shows contents without double-quotes for display and aggregate expressions
strInputString = strInputString.replace("{COLUMN_NAME_NO_QUOTES}", value if value else "")
# Concatenates column list into single expression for relative entropy
str_value = ConcatColumnList(value, "<NULL>")
strInputString = strInputString.replace("{CONCAT_COLUMNS}", str_value if str_value else "")
if parm == "match_groupby_names":
# Concatenates column list into single expression for relative entropy
str_value = ConcatColumnList(value, "<NULL>")
strInputString = strInputString.replace("{CONCAT_MATCH_GROUPBY}", str_value if str_value else "")
if parm == "subset_condition":
strInputString = strInputString.replace("{SUBSET_DISPLAY}", value.replace("'", "''") if value else "")


# Adding escape character where ':' is referenced
strInputString = strInputString.replace(":", "\\:")
Expand Down Expand Up @@ -140,15 +143,14 @@ def _ConstructAggregateMatchParms(self):
self.list_multi_column_error_condition = [i + " < 0" for i in cols]
self.multi_column_error_condition = " or ".join(self.list_multi_column_error_condition)


def GetTestQuery(self, booClean: bool):
strTestType = self.dctTestParms["test_type"]
strTemplate = self.dctTestParms["template_name"]

if strTemplate == "":
raise ValueError(f"No query template assigned to test_type {strTestType}")

if strTestType in {"AGG MATCH NO DROPS", "AGG MATCH SAME", "AGG MATCH NUM INCR"}:
self._ConstructAggregateMatchParms()
strQ = self._GetTestQueryFromTemplate(strTemplate)
# Final replace to cover parm within CUSTOM_QUERY parm
strQ = strQ.replace("{DATA_SCHEMA}", self.dctTestParms["schema_name"])
Expand Down
6 changes: 4 additions & 2 deletions testgen/commands/queries/generate_tests_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from testgen.common import CleanSQL, date_service, get_template_files, read_template_sql_file

LOG = logging.getLogger("testgen.cli")
LOG = logging.getLogger("testgen")


class CDeriveTestsSQL:
Expand All @@ -13,6 +13,7 @@ class CDeriveTestsSQL:
table_groups_id = ""
data_schema = ""
test_suite = ""
test_suite_id = ""
generation_set = ""
as_of_date = ""
sql_flavor = ""
Expand All @@ -38,6 +39,7 @@ def ReplaceParms(self, strInputString):
strInputString = strInputString.replace("{TABLE_GROUPS_ID}", self.table_groups_id)
strInputString = strInputString.replace("{RUN_DATE}", self.run_date)
strInputString = strInputString.replace("{TEST_SUITE}", self.test_suite)
strInputString = strInputString.replace("{TEST_SUITE_ID}", self.test_suite_id)
strInputString = strInputString.replace("{GENERATION_SET}", self.generation_set)
strInputString = strInputString.replace("{AS_OF_DATE}", self.as_of_date)
strInputString = strInputString.replace("{DATA_SCHEMA}", self.data_schema)
Expand Down Expand Up @@ -75,7 +77,7 @@ def GetTestDerivationQueriesAsList(self, booClean):
lstTemplate = [CleanSQL(q) for q in lstTemplate]

if len(lstQueries) == 0:
LOG.warning("No test templates were found")
LOG.warning("No funny CAT test generation templates were found")

return lstTemplate

Expand Down
8 changes: 8 additions & 0 deletions testgen/commands/queries/profiling_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class CProfilingSQL:
sampling_table = ""
sample_ratio = ""

process_id = None

contingency_max_values = "4"
contingency_columns = ""

Expand Down Expand Up @@ -95,6 +97,7 @@ def ReplaceParms(self, strInputString):
strInputString = strInputString.replace("{PARM_MAX_PATTERN_LENGTH}", str(self.parm_max_pattern_length))
strInputString = strInputString.replace("{CONTINGENCY_COLUMNS}", self.contingency_columns)
strInputString = strInputString.replace("{CONTINGENCY_MAX_VALUES}", self.contingency_max_values)
strInputString = strInputString.replace("{PROCESS_ID}", str(self.process_id))

return strInputString

Expand Down Expand Up @@ -133,6 +136,11 @@ def GetFunctionalTableTypeUpdateQuery(self):
strQ = self.ReplaceParms(read_template_sql_file("functional_tabletype_update.sql", sub_directory="profiling"))
return strQ

def GetPIIFlagUpdateQuery(self):
# Runs on DK Postgres Server
strQ = self.ReplaceParms(read_template_sql_file("pii_flag.sql", sub_directory="profiling"))
return strQ

def GetAnomalyRefreshQuery(self):
# Runs on DK Postgres Server
strQ = self.ReplaceParms(read_template_sql_file("refresh_anomalies.sql", sub_directory="profiling"))
Expand Down
5 changes: 4 additions & 1 deletion testgen/commands/run_execute_cat_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
WriteListToDB,
)

LOG = logging.getLogger("testgen.cli")
LOG = logging.getLogger("testgen")


def RetrieveTargetTables(clsCATExecute):
Expand Down Expand Up @@ -95,6 +95,9 @@ def run_cat_test_queries(
dctParms["sql_flavor"],
dctParms["url"],
dctParms["connect_by_url"],
dctParms["connect_by_key"],
dctParms["private_key"],
dctParms["private_key_passphrase"],
"PROJECT",
)

Expand Down
40 changes: 26 additions & 14 deletions testgen/commands/run_execute_tests.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import logging
import subprocess
import threading
import uuid

import testgen.common.process_service as process_service
from testgen import settings
from testgen.commands.queries.execute_tests_query import CTestExecutionSQL
from testgen.common import (
AssignConnectParms,
Expand All @@ -17,7 +20,7 @@
from .run_execute_cat_tests import run_cat_test_queries
from .run_test_parameter_validation import run_parameter_validation_queries

LOG = logging.getLogger("testgen.cli")
LOG = logging.getLogger("testgen")


def run_test_queries(strTestRunID, strTestTime, strProjectCode, strTestSuite, minutes_offset=0, spinner=None):
Expand All @@ -41,6 +44,9 @@ def run_test_queries(strTestRunID, strTestTime, strProjectCode, strTestSuite, mi
dctParms["sql_flavor"],
dctParms["url"],
dctParms["connect_by_url"],
dctParms["connect_by_key"],
dctParms["private_key"],
dctParms["private_key_passphrase"],
"PROJECT",
)

Expand All @@ -49,6 +55,7 @@ def run_test_queries(strTestRunID, strTestTime, strProjectCode, strTestSuite, mi
clsExecute = CTestExecutionSQL(strProjectCode, dctParms["sql_flavor"], strTestSuite, minutes_offset)
clsExecute.run_date = strTestTime
clsExecute.test_run_id = strTestRunID
clsExecute.process_id = process_service.get_current_process_id()
booClean = False

# Add a record in Test Run table for the new Test Run
Expand Down Expand Up @@ -89,7 +96,7 @@ def run_test_queries(strTestRunID, strTestTime, strProjectCode, strTestSuite, mi
if intErrors > 0:
booErrors = True
error_msg = (
f"Errors were encountered executing aggregate tests. ({intErrors} errors occurred.) "
f"Errors were encountered executing Referential Tests. ({intErrors} errors occurred.) "
"Please check log. "
)
LOG.warning(error_msg)
Expand All @@ -109,18 +116,23 @@ def run_test_queries(strTestRunID, strTestTime, strProjectCode, strTestSuite, mi
return booErrors, error_msg


def run_execution_steps_in_background(strProjectCode, strTestSuite, minutes_offset=0):
LOG.info(f"Starting run_execution_steps_in_background against test suite: {strTestSuite}")
empty_cache()
background_thread = threading.Thread(
target=run_execution_steps,
args=(
strProjectCode,
strTestSuite,
minutes_offset,
),
)
background_thread.start()
def run_execution_steps_in_background(project_code, test_suite):
msg = f"Starting run_execution_steps_in_background against test suite: {test_suite}"
if settings.IS_DEBUG:
LOG.info(msg + ". Running in debug mode (new thread instead of new process).")
empty_cache()
background_thread = threading.Thread(
target=run_execution_steps,
args=(
project_code,
test_suite
),
)
background_thread.start()
else:
LOG.info(msg)
script = ["testgen", "run-tests", "--project-key", project_code, "--test-suite-key", test_suite]
subprocess.Popen(script) # NOQA S603


def run_execution_steps(strProjectCode, strTestSuite, minutes_offset=0, spinner=None):
Expand Down
8 changes: 6 additions & 2 deletions testgen/commands/run_generate_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from testgen.commands.queries.generate_tests_query import CDeriveTestsSQL
from testgen.common import AssignConnectParms, RetrieveDBResultsToDictList, RetrieveTestGenParms, RunActionQueryList

LOG = logging.getLogger("testgen.cli")
LOG = logging.getLogger("testgen")


def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None):
Expand All @@ -18,7 +18,7 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None):
LOG.info("CurrentStep: Retrieving General Parameters for Test Suite " + strTestSuite)
dctParms = RetrieveTestGenParms(strTableGroupsID, strTestSuite)

# Set Project Connection Parms in db_bridgers from retrieved parms
# Set Project Connection Parms from retrieved parms
LOG.info("CurrentStep: Assigning Connection Parameters")
AssignConnectParms(
dctParms["project_code"],
Expand All @@ -31,12 +31,16 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None):
dctParms["sql_flavor"],
dctParms["url"],
dctParms["connect_by_url"],
dctParms["connect_by_key"],
dctParms["private_key"],
dctParms["private_key_passphrase"],
"PROJECT",
)

# Set static parms
clsTests.project_code = dctParms["project_code"]
clsTests.test_suite = strTestSuite
clsTests.test_suite_id = dctParms["test_suite_id"]
clsTests.generation_set = strGenerationSet if strGenerationSet is not None else ""
clsTests.connection_id = str(dctParms["connection_id"])
clsTests.table_groups_id = strTableGroupsID
Expand Down
Loading
Loading