diff --git a/.github/workflows/publish-python-package.yml b/.github/workflows/publish-python-package.yml index 75b9a41e2..4ed9e1bf3 100644 --- a/.github/workflows/publish-python-package.yml +++ b/.github/workflows/publish-python-package.yml @@ -20,7 +20,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/test-python-package.yml b/.github/workflows/test-python-package.yml index fa84b3d3a..3c88e7211 100644 --- a/.github/workflows/test-python-package.yml +++ b/.github/workflows/test-python-package.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, "3.10"] + python-version: [3.9, "3.10", "3.11"] steps: - uses: actions/checkout@v4 diff --git a/MANIFEST.in b/MANIFEST.in index 12480abd8..0ace6ebe9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ global-exclude .DS_Store +global-exclude */__pycache__/* include *.txt include CODEOWNERS diff --git a/dataprofiler/labelers/char_load_tf_model.py b/dataprofiler/labelers/char_load_tf_model.py index b168e9234..a4a44e03a 100644 --- a/dataprofiler/labelers/char_load_tf_model.py +++ b/dataprofiler/labelers/char_load_tf_model.py @@ -237,7 +237,8 @@ def _construct_model(self) -> None: model_loc = self._parameters["model_path"] self._model: tf.keras.Model = tf.keras.models.load_model(model_loc) - softmax_output_layer_name = self._model.outputs[0].name.split("/")[0] + self._model = tf.keras.Model(self._model.inputs, self._model.outputs) + softmax_output_layer_name = self._model.output_names[0] softmax_layer_ind = cast( int, labeler_utils.get_tf_layer_index_from_name( @@ -252,21 +253,28 @@ def _construct_model(self) -> None: num_labels, activation="softmax", name="softmax_output" )(self._model.layers[softmax_layer_ind - 1].output) - # Output the model into a .pb file for TensorFlow - argmax_layer = tf.keras.backend.argmax(new_softmax_layer) + # Add argmax layer to get labels directly as an output + argmax_layer = tf.keras.ops.argmax(new_softmax_layer, axis=2) argmax_outputs = [new_softmax_layer, argmax_layer] self._model = tf.keras.Model(self._model.inputs, argmax_outputs) + self._model = tf.keras.Model(self._model.inputs, self._model.outputs) # Compile the model w/ metrics - softmax_output_layer_name = self._model.outputs[0].name.split("/")[0] + softmax_output_layer_name = self._model.output_names[0] losses = {softmax_output_layer_name: "categorical_crossentropy"} # use f1 score metric f1_score_training = labeler_utils.F1Score( num_classes=num_labels, average="micro" ) - metrics = {softmax_output_layer_name: ["acc", f1_score_training]} + metrics = { + softmax_output_layer_name: [ + "categorical_crossentropy", + "acc", + f1_score_training, + ] + } self._model.compile(loss=losses, optimizer="adam", metrics=metrics) @@ -294,30 +302,33 @@ def _reconstruct_model(self) -> None: num_labels = self.num_labels default_ind = self.label_mapping[self._parameters["default_label"]] - # Remove the 2 output layers ('softmax', 'tf_op_layer_ArgMax') - for _ in range(2): - self._model.layers.pop() - # Add the final Softmax layer to the previous spot + # self._model.layers[-2] to skip: original softmax final_softmax_layer = tf.keras.layers.Dense( num_labels, activation="softmax", name="softmax_output" - )(self._model.layers[-4].output) + )(self._model.layers[-2].output) - # Output the model into a .pb file for TensorFlow - argmax_layer = tf.keras.backend.argmax(final_softmax_layer) + # Add argmax layer to get labels directly as an output + argmax_layer = tf.keras.ops.argmax(final_softmax_layer, axis=2) argmax_outputs = [final_softmax_layer, argmax_layer] self._model = tf.keras.Model(self._model.inputs, argmax_outputs) # Compile the model - softmax_output_layer_name = self._model.outputs[0].name.split("/")[0] + softmax_output_layer_name = self._model.output_names[0] losses = {softmax_output_layer_name: "categorical_crossentropy"} # use f1 score metric f1_score_training = labeler_utils.F1Score( num_classes=num_labels, average="micro" ) - metrics = {softmax_output_layer_name: ["acc", f1_score_training]} + metrics = { + softmax_output_layer_name: [ + "categorical_crossentropy", + "acc", + f1_score_training, + ] + } self._model.compile(loss=losses, optimizer="adam", metrics=metrics) @@ -370,7 +381,7 @@ def fit( f1_report: dict = {} self._model.reset_metrics() - softmax_output_layer_name = self._model.outputs[0].name.split("/")[0] + softmax_output_layer_name = self._model.output_names[0] start_time = time.time() batch_id = 0 diff --git a/dataprofiler/labelers/character_level_cnn_model.py b/dataprofiler/labelers/character_level_cnn_model.py index 3194a2616..2cbb7051a 100644 --- a/dataprofiler/labelers/character_level_cnn_model.py +++ b/dataprofiler/labelers/character_level_cnn_model.py @@ -74,6 +74,133 @@ def create_glove_char(n_dims: int, source_file: str = None) -> None: file.write(word + " " + " ".join(str(num) for num in embd) + "\n") +@tf.keras.utils.register_keras_serializable(package="CharacterLevelCnnModel") +class ThreshArgMaxLayer(tf.keras.layers.Layer): + """Keras layer applying a thresholded argmax.""" + + def __init__( + self, threshold_: float, num_labels_: int, default_ind: int = 1, *args, **kwargs + ) -> None: + """Apply a minimum threshold to the argmax value. + + When below this threshold the index will be the default. + + :param num_labels: number of entities + :type num_labels: int + :param threshold: default set to 0 so all confidences pass. + :type threshold: float + :param default_ind: default index + :type default_ind: int + :return: final argmax threshold layer for the model + :return : tensor containing argmax thresholded integers, labels out + :rtype: tf.Tensor + """ + super().__init__(*args, **kwargs) + self._threshold_ = threshold_ + self._num_labels_ = num_labels_ + self._default_ind = default_ind + thresh_init = tf.constant_initializer(threshold_) + self.thresh_vec = tf.Variable( + name="ThreshVec", + initial_value=thresh_init(shape=[num_labels_]), + trainable=False, + ) + + def get_config(self): + """Return a serializable config for saving the layer.""" + config = super().get_config().copy() + config.update( + { + "threshold_": self._threshold_, + "num_labels_": self._num_labels_, + "default_ind": self._default_ind, + } + ) + return config + + def call(self, argmax_layer: tf.Tensor, confidence_layer: tf.Tensor) -> tf.Tensor: + """Apply the threshold argmax to the input tensor.""" + threshold_at_argmax = tf.gather(self.thresh_vec, argmax_layer) + + confidence_max_layer = tf.keras.backend.max(confidence_layer, axis=2) + + # Check if the confidences meet the threshold minimum. + argmax_mask = tf.keras.backend.cast( + tf.keras.backend.greater_equal(confidence_max_layer, threshold_at_argmax), + dtype=argmax_layer.dtype, + ) + + # Create a vector the same size as the batch_size which + # represents the background label + bg_label_tf = tf.keras.backend.constant( + self._default_ind, dtype=argmax_layer.dtype + ) + + # Generate the final predicted output using the function: + final_predicted_layer = tf.add( + bg_label_tf, + tf.multiply(tf.subtract(argmax_layer, bg_label_tf), argmax_mask), + name="ThreshArgMax", + ) + # final_predicted_layer.set_shape(argmax_layer.shape) + return final_predicted_layer + + +@tf.keras.utils.register_keras_serializable(package="CharacterLevelCnnModel") +class EncodingLayer(tf.keras.layers.Layer): + """Encodes strings to integers.""" + + def __init__( + self, max_char_encoding_id: int, max_len: int, *args, **kwargs + ) -> None: + """ + Encode characters for the list of sentences. + + :param max_char_encoding_id: Maximum integer value for encoding the + input + :type max_char_encoding_id: int + :param max_len: Maximum char length in a sample + :type max_len: int + """ + super().__init__(*args, **kwargs) + self.max_char_encoding_id = max_char_encoding_id + self.max_len = max_len + + def get_config(self): + """Return a serializable config for saving the layer.""" + config = super().get_config().copy() + config.update( + { + "max_char_encoding_id": self.max_char_encoding_id, + "max_len": self.max_len, + } + ) + return config + + def call(self, input_str_tensor: tf.Tensor) -> tf.Tensor: + """ + Encode characters for the list of sentences. + + :param input_str_tensor: input list of sentences converted to tensor + :type input_str_tensor: tf.tensor + :return : tensor containing encoded list of input sentences + :rtype: tf.Tensor + """ + # convert characters to indices + input_str_flatten = tf.reshape(input_str_tensor, [-1]) + sentences_encode = tf.strings.unicode_decode( + input_str_flatten, input_encoding="UTF-8" + ) + sentences_encode = tf.add(tf.cast(1, tf.int32), sentences_encode) + sentences_encode = tf.math.minimum( + sentences_encode, self.max_char_encoding_id + 1 + ) + + # padding + sentences_encode_pad = sentences_encode.to_tensor(shape=[None, self.max_len]) + return sentences_encode_pad + + class CharacterLevelCnnModel(BaseTrainableModel, metaclass=AutoSubRegistrationMeta): """Class for training char data labeler.""" @@ -280,7 +407,7 @@ def save_to_disk(self, dirpath: str) -> None: labels_dirpath = os.path.join(dirpath, "label_mapping.json") with open(labels_dirpath, "w") as fp: json.dump(self.label_mapping, fp) - self._model.save(os.path.join(dirpath)) + self._model.save(os.path.join(dirpath, "model.keras")) @classmethod def load_from_disk(cls, dirpath: str) -> CharacterLevelCnnModel: @@ -301,15 +428,7 @@ def load_from_disk(cls, dirpath: str) -> CharacterLevelCnnModel: with open(labels_dirpath) as fp: label_mapping = json.load(fp) - # use f1 score metric - custom_objects = { - "F1Score": labeler_utils.F1Score( - num_classes=max(label_mapping.values()) + 1, average="micro" - ), - "CharacterLevelCnnModel": cls, - } - with tf.keras.utils.custom_object_scope(custom_objects): - tf_model = tf.keras.models.load_model(dirpath) + tf_model = tf.keras.models.load_model(os.path.join(dirpath, "model.keras")) loaded_model = cls(label_mapping, parameters) loaded_model._model = tf_model @@ -333,35 +452,6 @@ def load_from_disk(cls, dirpath: str) -> CharacterLevelCnnModel: ] return loaded_model - @staticmethod - def _char_encoding_layer( - input_str_tensor: tf.Tensor, max_char_encoding_id: int, max_len: int - ) -> tf.Tensor: - """ - Encode characters for the list of sentences. - - :param input_str_tensor: input list of sentences converted to tensor - :type input_str_tensor: tf.tensor - :param max_char_encoding_id: Maximum integer value for encoding the - input - :type max_char_encoding_id: int - :param max_len: Maximum char length in a sample - :type max_len: int - :return : tensor containing encoded list of input sentences - :rtype: tf.Tensor - """ - # convert characters to indices - input_str_flatten = tf.reshape(input_str_tensor, [-1]) - sentences_encode = tf.strings.unicode_decode( - input_str_flatten, input_encoding="UTF-8" - ) - sentences_encode = tf.add(tf.cast(1, tf.int32), sentences_encode) - sentences_encode = tf.math.minimum(sentences_encode, max_char_encoding_id + 1) - - # padding - sentences_encode_pad = sentences_encode.to_tensor(shape=[None, max_len]) - return sentences_encode_pad - @staticmethod def _argmax_threshold_layer( num_labels: int, threshold: float = 0.0, default_ind: int = 1 @@ -383,47 +473,7 @@ def _argmax_threshold_layer( """ # Initialize the thresholds vector variable and create the threshold # matrix. - class ThreshArgMaxLayer(tf.keras.layers.Layer): - def __init__(self, threshold_: float, num_labels_: int) -> None: - super().__init__() - thresh_init = tf.constant_initializer(threshold_) - self.thresh_vec = tf.Variable( - name="ThreshVec", - initial_value=thresh_init(shape=[num_labels_]), - trainable=False, - ) - - def call( - self, argmax_layer: tf.Tensor, confidence_layer: tf.Tensor - ) -> tf.Tensor: - threshold_at_argmax = tf.gather(self.thresh_vec, argmax_layer) - - confidence_max_layer = tf.keras.backend.max(confidence_layer, axis=2) - - # Check if the confidences meet the threshold minimum. - argmax_mask = tf.keras.backend.cast( - tf.keras.backend.greater_equal( - confidence_max_layer, threshold_at_argmax - ), - dtype=argmax_layer.dtype, - ) - - # Create a vector the same size as the batch_size which - # represents the background label - bg_label_tf = tf.keras.backend.constant( - default_ind, dtype=argmax_layer.dtype - ) - - # Generate the final predicted output using the function: - final_predicted_layer = tf.add( - bg_label_tf, - tf.multiply(tf.subtract(argmax_layer, bg_label_tf), argmax_mask), - name="ThreshArgMax", - ) - - return final_predicted_layer - - return ThreshArgMaxLayer(threshold, num_labels) + return ThreshArgMaxLayer(threshold, num_labels, default_ind) def _construct_model(self) -> None: """ @@ -449,17 +499,13 @@ def _construct_model(self) -> None: max_length = self._parameters["max_length"] max_char_encoding_id = self._parameters["max_char_encoding_id"] - # Encoding layer - def encoding_function(input_str: tf.Tensor) -> tf.Tensor: - char_in_vector = CharacterLevelCnnModel._char_encoding_layer( - input_str, max_char_encoding_id, max_length - ) - return char_in_vector - self._model.add(tf.keras.layers.Input(shape=(None,), dtype=tf.string)) self._model.add( - tf.keras.layers.Lambda(encoding_function, output_shape=tuple([max_length])) + EncodingLayer( + max_char_encoding_id=max_char_encoding_id, + max_len=max_length, + ), ) # Create a pre-trained weight matrix @@ -474,7 +520,6 @@ def encoding_function(input_str: tf.Tensor) -> tf.Tensor: ) embedding_dict = build_embd_dictionary(embed_file) - input_shape = tuple([max_length]) # Fill in the weight matrix: let pad and space be 0s for ascii_num in range(max_char_encoding_id): if chr(ascii_num) in embedding_dict: @@ -485,7 +530,6 @@ def encoding_function(input_str: tf.Tensor) -> tf.Tensor: max_char_encoding_id + 2, self._parameters["dim_embed"], weights=[embedding_matrix], - input_length=input_shape[0], trainable=True, ) ) @@ -502,8 +546,7 @@ def encoding_function(input_str: tf.Tensor) -> tf.Tensor: ) if self._parameters["dropout"]: self._model.add(tf.keras.layers.Dropout(self._parameters["dropout"])) - # Add batch normalization, set fused = True for compactness - self._model.add(tf.keras.layers.BatchNormalization(fused=False, scale=True)) + self._model.add(tf.keras.layers.BatchNormalization(scale=True)) # Add the fully connected layers for size in self._parameters["size_fc"]: @@ -514,29 +557,35 @@ def encoding_function(input_str: tf.Tensor) -> tf.Tensor: # Add the final Softmax layer self._model.add(tf.keras.layers.Dense(num_labels, activation="softmax")) - # Output the model into a .pb file for TensorFlow - argmax_layer = tf.keras.backend.argmax(self._model.output) + # Add argmax layer to get labels directly as an output + argmax_layer = tf.keras.ops.argmax(self._model.outputs[0], axis=2) # Create confidence layers - final_predicted_layer = CharacterLevelCnnModel._argmax_threshold_layer( - num_labels, threshold=0.0, default_ind=default_ind + final_predicted_layer = ThreshArgMaxLayer( + threshold_=0.0, num_labels_=num_labels, default_ind=default_ind ) argmax_outputs = self._model.outputs + [ argmax_layer, - final_predicted_layer(argmax_layer, self._model.output), + final_predicted_layer(argmax_layer, self._model.outputs[0]), ] self._model = tf.keras.Model(self._model.inputs, argmax_outputs) # Compile the model - softmax_output_layer_name = self._model.outputs[0].name.split("/")[0] + softmax_output_layer_name = self._model.output_names[0] losses = {softmax_output_layer_name: "categorical_crossentropy"} # use f1 score metric f1_score_training = labeler_utils.F1Score( num_classes=num_labels, average="micro" ) - metrics = {softmax_output_layer_name: ["acc", f1_score_training]} + metrics = { + softmax_output_layer_name: [ + "categorical_crossentropy", + "acc", + f1_score_training, + ] + } self._model.compile(loss=losses, optimizer="adam", metrics=metrics) @@ -564,22 +613,18 @@ def _reconstruct_model(self) -> None: num_labels = self.num_labels default_ind = self.label_mapping[self._parameters["default_label"]] - # Remove the 3 output layers (dense_2', 'tf_op_layer_ArgMax', - # 'thresh_arg_max_layer') - for _ in range(3): - self._model.layers.pop() - # Add the final Softmax layer to the previous spot + # self._model.layers[-3] to skip: thresh and original softmax final_softmax_layer = tf.keras.layers.Dense( num_labels, activation="softmax", name="dense_2" - )(self._model.layers[-4].output) + )(self._model.layers[-3].output) - # Output the model into a .pb file for TensorFlow - argmax_layer = tf.keras.backend.argmax(final_softmax_layer) + # Add argmax layer to get labels directly as an output + argmax_layer = tf.keras.ops.argmax(final_softmax_layer, axis=2) # Create confidence layers - final_predicted_layer = CharacterLevelCnnModel._argmax_threshold_layer( - num_labels, threshold=0.0, default_ind=default_ind + final_predicted_layer = ThreshArgMaxLayer( + threshold_=0.0, num_labels_=num_labels, default_ind=default_ind ) argmax_outputs = [final_softmax_layer] + [ @@ -589,14 +634,20 @@ def _reconstruct_model(self) -> None: self._model = tf.keras.Model(self._model.inputs, argmax_outputs) # Compile the model - softmax_output_layer_name = self._model.outputs[0].name.split("/")[0] + softmax_output_layer_name = self._model.output_names[0] losses = {softmax_output_layer_name: "categorical_crossentropy"} # use f1 score metric f1_score_training = labeler_utils.F1Score( num_classes=num_labels, average="micro" ) - metrics = {softmax_output_layer_name: ["acc", f1_score_training]} + metrics = { + softmax_output_layer_name: [ + "categorical_crossentropy", + "acc", + f1_score_training, + ] + } self._model.compile(loss=losses, optimizer="adam", metrics=metrics) self._epoch_id = 0 @@ -648,7 +699,7 @@ def fit( f1_report: dict = {} self._model.reset_metrics() - softmax_output_layer_name = self._model.outputs[0].name.split("/")[0] + softmax_output_layer_name = self._model.output_names[0] start_time = time.time() batch_id = 0 @@ -729,7 +780,9 @@ def _validate_training( for x_val, y_val in val_data: y_val_pred.append( self._model.predict( - x_val, batch_size=batch_size_test, verbose=verbose_keras + tf.convert_to_tensor(x_val), + batch_size=batch_size_test, + verbose=verbose_keras, )[1] ) y_val_test.append(np.argmax(y_val, axis=-1)) diff --git a/dataprofiler/labelers/data_labelers.py b/dataprofiler/labelers/data_labelers.py index 7172e7472..a6d9932b7 100644 --- a/dataprofiler/labelers/data_labelers.py +++ b/dataprofiler/labelers/data_labelers.py @@ -141,11 +141,11 @@ def load_from_library(cls, name: str, trainable: bool = False) -> BaseDataLabele :type trainable: bool :return: DataLabeler class """ + for labeler_name, labeler_class_obj in cls.labeler_classes.items(): + if name == labeler_name: + name = labeler_class_obj._default_model_loc if trainable: return TrainableDataLabeler.load_from_library(name) - for _, labeler_class_obj in cls.labeler_classes.items(): - if name in labeler_class_obj._default_model_loc: - return labeler_class_obj() return BaseDataLabeler.load_from_library(name) @classmethod diff --git a/dataprofiler/labelers/labeler_utils.py b/dataprofiler/labelers/labeler_utils.py index b6070ff72..3a24886f3 100644 --- a/dataprofiler/labelers/labeler_utils.py +++ b/dataprofiler/labelers/labeler_utils.py @@ -358,7 +358,7 @@ def __init__( def _zero_wt_init(name: str) -> tf.Variable: return self.add_weight( - name, shape=self.init_shape, initializer="zeros", dtype=self.dtype + name=name, shape=self.init_shape, initializer="zeros", dtype=self.dtype ) self.true_positives = _zero_wt_init("true_positives") @@ -435,11 +435,6 @@ def get_config(self) -> dict: base_config = super().get_config() return {**base_config, **config} - def reset_state(self) -> None: - """Reset state.""" - reset_value = tf.zeros(self.init_shape, dtype=self.dtype) - tf.keras.backend.batch_set_value([(v, reset_value) for v in self.variables]) - @protected_register_keras_serializable() class F1Score(FBetaScore): diff --git a/dataprofiler/tests/labelers/test_char_tf_load_model.py b/dataprofiler/tests/labelers/test_char_tf_load_model.py index fbfde0c49..c6d70f740 100644 --- a/dataprofiler/tests/labelers/test_char_tf_load_model.py +++ b/dataprofiler/tests/labelers/test_char_tf_load_model.py @@ -272,7 +272,7 @@ def test_fit_and_predict(self, *mocks): ) # predict after fitting on just the text - model.predict(data_gen[0][0]) + model.predict([data_gen[0][0]]) @mock.patch("os.makedirs", return_value=None) def test_validation_evaluate_and_classification_report(self, *mocks): diff --git a/dataprofiler/tests/labelers/test_character_level_cnn_model.py b/dataprofiler/tests/labelers/test_character_level_cnn_model.py index ad549cc53..e120a9754 100644 --- a/dataprofiler/tests/labelers/test_character_level_cnn_model.py +++ b/dataprofiler/tests/labelers/test_character_level_cnn_model.py @@ -9,7 +9,10 @@ import pkg_resources import tensorflow as tf -from dataprofiler.labelers.character_level_cnn_model import CharacterLevelCnnModel +from dataprofiler.labelers.character_level_cnn_model import ( + CharacterLevelCnnModel, + EncodingLayer, +) _file_dir = os.path.dirname(os.path.abspath(__file__)) _resource_labeler_dir = pkg_resources.resource_filename("resources", "labelers") @@ -272,7 +275,7 @@ def test_fit_and_predict_with_new_labels(self): ) # predict after fitting on just the text - cnn_model.predict(data_gen[0][0]) + cnn_model.predict([data_gen[0][0]]) def test_fit_and_predict_with_new_labels_set_via_method(self): # Initialize model @@ -301,7 +304,7 @@ def test_fit_and_predict_with_new_labels_set_via_method(self): history, f1, f1_report = cnn_model.fit(data_gen, cv_gen) # test predict on just the text - cnn_model.predict(data_gen[0][0]) + cnn_model.predict([data_gen[0][0]]) def test_validation(self): @@ -368,9 +371,8 @@ def test_input_encoding(self): max_char_encoding_id = 127 max_len = 10 - encode_output = cnn_model._char_encoding_layer( - input_str_tensor, max_char_encoding_id, max_len - ).numpy()[0] + encode_layer = EncodingLayer(max_char_encoding_id, max_len) + encode_output = encode_layer.call(input_str_tensor).numpy()[0] expected_output = [117, 102, 116, 117, 0, 0, 0, 0, 0, 0] self.assertCountEqual(encode_output, expected_output) @@ -464,7 +466,6 @@ def test_model_construct(self): "dense_1", "dropout_5", "dense_2", - "tf_op_layer_ArgMax", "thresh_arg_max_layer", ] model_layers = [layer.name for layer in cnn_model._model.layers] diff --git a/dataprofiler/tests/labelers/test_labeler_utils.py b/dataprofiler/tests/labelers/test_labeler_utils.py index f59a43e3f..c14fca54f 100644 --- a/dataprofiler/tests/labelers/test_labeler_utils.py +++ b/dataprofiler/tests/labelers/test_labeler_utils.py @@ -1,6 +1,6 @@ import logging +import tempfile import unittest -from unittest import mock import numpy as np import pandas as pd @@ -235,9 +235,7 @@ def test_verbose(self): self.assertIn("f1-score ", log_output) self.assertIn("F1 Score: ", log_output) - @mock.patch("dataprofiler.labelers.labeler_utils.classification_report") - @mock.patch("pandas.DataFrame") - def test_save_conf_mat(self, mock_dataframe, mock_report): + def test_save_conf_mat(self): # ideally mock out the actual contents written to file, but # would be difficult to get this completely worked out. @@ -248,28 +246,25 @@ def test_save_conf_mat(self, mock_dataframe, mock_report): [0, 1, 2], ] ) - expected_row_col_names = dict( - columns=["pred:PAD", "pred:UNKNOWN", "pred:OTHER"], - index=["true:PAD", "true:UNKNOWN", "true:OTHER"], - ) - mock_instance_df = mock.Mock(spec=pd.DataFrame)() - mock_dataframe.return_value = mock_instance_df - - # still omit bc confusion mat should include all despite omit - f1, f1_report = labeler_utils.evaluate_accuracy( - self.y_pred, - self.y_true, - self.num_labels, - self.reverse_label_mapping, - omitted_labels=["PAD"], - verbose=False, - confusion_matrix_file="test.csv", - ) + expected_columns = ["pred:PAD", "pred:UNKNOWN", "pred:OTHER"] + expected_index = ["true:PAD", "true:UNKNOWN", "true:OTHER"] - self.assertTrue((mock_dataframe.call_args[0][0] == expected_conf_mat).all()) - self.assertDictEqual(expected_row_col_names, mock_dataframe.call_args[1]) + with tempfile.NamedTemporaryFile() as tmpFile: + # still omit bc confusion mat should include all despite omit + f1, f1_report = labeler_utils.evaluate_accuracy( + self.y_pred, + self.y_true, + self.num_labels, + self.reverse_label_mapping, + omitted_labels=["PAD"], + verbose=False, + confusion_matrix_file=tmpFile.name, + ) - mock_instance_df.to_csv.assert_called() + df1 = pd.read_csv(tmpFile.name, index_col=0) + self.assertListEqual(list(df1.columns), expected_columns) + self.assertListEqual(list(df1.index), expected_index) + np.testing.assert_array_equal(df1.values, expected_conf_mat) class TestTFFunctions(unittest.TestCase): diff --git a/dataprofiler/version.py b/dataprofiler/version.py index 1136efae1..b41e1c451 100644 --- a/dataprofiler/version.py +++ b/dataprofiler/version.py @@ -1,7 +1,7 @@ """File contains the version number for the package.""" MAJOR = 0 -MINOR = 11 +MINOR = 12 MICRO = 0 POST = None # otherwise None diff --git a/requirements-dev.txt b/requirements-dev.txt index cff8f51a0..f6343283c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,6 +3,7 @@ black>=24.3.0 isort==5.12.0 pre-commit==2.19.0 tox==3.25.1 +tox-conda==0.10.2 types-setuptools==67.7.0.1 types-python-dateutil==2.8.19.12 types-requests==2.30.0.0 diff --git a/requirements-ml.txt b/requirements-ml.txt index ff525fec1..6da08b313 100644 --- a/requirements-ml.txt +++ b/requirements-ml.txt @@ -1,7 +1,7 @@ scikit-learn>=0.23.2 -keras>=2.4.3,<3.0.0 +keras>=3.0.0 rapidfuzz>=2.6.1 -tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin' -tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64' -tensorflow-macos>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine == 'arm64' +tensorflow>=2.16.0; sys.platform != 'darwin' +tensorflow>=2.16.0; sys_platform == 'darwin' and platform_machine != 'arm64' +tensorflow-macos>=2.16.0; sys_platform == 'darwin' and platform_machine == 'arm64' tqdm>=4.0.0 diff --git a/requirements-test.txt b/requirements-test.txt index 6c981cf9c..725b23849 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,5 +1,5 @@ coverage>=5.0.1 -dask>=2.29.0,<2024.2.0 +dask[dask-expr,dataframe]>=2024.4.1 fsspec>=0.3.3 pytest>=6.0.1 pytest-cov>=2.8.1 diff --git a/requirements.txt b/requirements.txt index a45dc34ae..152b5eb36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ python-dateutil>=2.7.5 pytz>=2020.1 pyarrow>=1.0.1 chardet>=3.0.4 -fastavro>=1.0.0.post1 +fastavro>=1.1.0 python-snappy>=0.5.4 charset-normalizer>=1.3.6 psutil>=4.0.0 diff --git a/resources/labelers/structured_model/keras_metadata.pb b/resources/labelers/structured_model/keras_metadata.pb deleted file mode 100644 index dcc84a213..000000000 --- a/resources/labelers/structured_model/keras_metadata.pb +++ /dev/null @@ -1,29 +0,0 @@ - -ã`root"_tf_keras_network*Á`{"name": "functional_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Functional", "config": {"name": "functional_1", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "Lambda", "config": {"name": "lambda", "trainable": true, "dtype": "float32", "function": {"class_name": "__tuple__", "items": ["4wEAAAAAAAAAAgAAAAQAAAATAAAAcxIAAAB0AGoBfACIAIgBgwN9AXwBUwApAU4pAtoWQ2hhcmFj\ndGVyTGV2ZWxDbm5Nb2RlbNoUX2NoYXJfZW5jb2RpbmdfbGF5ZXIpAtoJaW5wdXRfc3RyWg5jaGFy\nX2luX3ZlY3RvcikC2hRtYXhfY2hhcl9lbmNvZGluZ19pZNoKbWF4X2xlbmd0aKkA+lMvaG9tZS91\nYnVudHUvbmV3LWRwL0RhdGFQcm9maWxlci9kYXRhcHJvZmlsZXIvbGFiZWxlcnMvY2hhcmFjdGVy\nX2xldmVsX2Nubl9tb2RlbC5wedoRZW5jb2RpbmdfZnVuY3Rpb25TAgAAcwYAAAAAAQQBCgE=\n", null, {"class_name": "__tuple__", "items": [127, 3400]}]}, "function_type": "lambda", "module": "dataprofiler.labelers.character_level_cnn_model", "output_shape": {"class_name": "__tuple__", "items": [3400]}, "output_shape_type": "raw", "output_shape_module": null, "arguments": {}}, "name": "lambda", "inbound_nodes": [[["input_1", 0, 0, {}]]]}, {"class_name": "Embedding", "config": {"name": "embedding", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3400]}, "dtype": "float32", "input_dim": 129, "output_dim": 64, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 3400}, "name": "embedding", "inbound_nodes": [[["lambda", 0, 0, {}]]]}, {"class_name": "Conv1D", "config": {"name": "conv1d", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv1d", "inbound_nodes": [[["embedding", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout", "inbound_nodes": [[["conv1d", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization", "inbound_nodes": [[["dropout", 0, 0, {}]]]}, {"class_name": "Conv1D", "config": {"name": "conv1d_1", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv1d_1", "inbound_nodes": [[["batch_normalization", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_1", "inbound_nodes": [[["conv1d_1", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_1", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_1", "inbound_nodes": [[["dropout_1", 0, 0, {}]]]}, {"class_name": "Conv1D", "config": {"name": "conv1d_2", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv1d_2", "inbound_nodes": [[["batch_normalization_1", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_2", "inbound_nodes": [[["conv1d_2", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_2", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_2", "inbound_nodes": [[["dropout_2", 0, 0, {}]]]}, {"class_name": "Conv1D", "config": {"name": "conv1d_3", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv1d_3", "inbound_nodes": [[["batch_normalization_2", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_3", "inbound_nodes": [[["conv1d_3", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_3", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_3", "inbound_nodes": [[["dropout_3", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 96, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["batch_normalization_3", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_4", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_4", "inbound_nodes": [[["dense", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 96, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_1", "inbound_nodes": [[["dropout_4", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_5", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_5", "inbound_nodes": [[["dense_1", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 24, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_2", "inbound_nodes": [[["dropout_5", 0, 0, {}]]]}, {"class_name": "TensorFlowOpLayer", "config": {"name": "ArgMax", "trainable": true, "dtype": "float32", "node_def": {"name": "ArgMax", "op": "ArgMax", "input": ["dense_2/truediv", "ArgMax/dimension"], "attr": {"Tidx": {"type": "DT_INT32"}, "output_type": {"type": "DT_INT64"}, "T": {"type": "DT_FLOAT"}}}, "constants": {"1": -1}}, "name": "tf_op_layer_ArgMax", "inbound_nodes": [[["dense_2", 0, 0, {}]]]}, {"class_name": "ThreshArgMaxLayer", "config": {"layer was saved without config": true}, "name": "thresh_arg_max_layer", "inbound_nodes": [[["tf_op_layer_ArgMax", 0, 0, {"confidence_layer": ["dense_2", 0, 0]}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense_2", 0, 0], ["tf_op_layer_ArgMax", 0, 0], ["thresh_arg_max_layer", 0, 0]]}, "shared_object_id": 52, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, null]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string", "input_1"]}, "keras_version": "2.6.0", "backend": "tensorflow", "model_config": {"class_name": "Functional"}, "training_config": {"loss": {"dense_2": "categorical_crossentropy"}, "metrics": [[{"class_name": "MeanMetricWrapper", "config": {"name": "acc", "dtype": "float32", "fn": "categorical_accuracy"}, "shared_object_id": 54}, {"class_name": "Custom>F1Score", "config": {"name": "dense_2_f1_score", "dtype": "float32", "num_classes": 24, "average": "micro", "threshold": null}, "shared_object_id": 55}], [null], [null]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Adam", "config": {"name": "Adam", "learning_rate": 0.0010000000474974513, "decay": 0.0, "beta_1": 0.8999999761581421, "beta_2": 0.9990000128746033, "epsilon": 1e-07, "amsgrad": false}}}}2 -ú root.layer-0"_tf_keras_input_layer*Ê{"class_name": "InputLayer", "name": "input_1", "dtype": "string", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "sparse": false, "ragged": false, "name": "input_1"}}2 -Å root.layer-1"_tf_keras_layer*›{"name": "lambda", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Lambda", "config": {"name": "lambda", "trainable": true, "dtype": "float32", "function": {"class_name": "__tuple__", "items": ["4wEAAAAAAAAAAgAAAAQAAAATAAAAcxIAAAB0AGoBfACIAIgBgwN9AXwBUwApAU4pAtoWQ2hhcmFj\ndGVyTGV2ZWxDbm5Nb2RlbNoUX2NoYXJfZW5jb2RpbmdfbGF5ZXIpAtoJaW5wdXRfc3RyWg5jaGFy\nX2luX3ZlY3RvcikC2hRtYXhfY2hhcl9lbmNvZGluZ19pZNoKbWF4X2xlbmd0aKkA+lMvaG9tZS91\nYnVudHUvbmV3LWRwL0RhdGFQcm9maWxlci9kYXRhcHJvZmlsZXIvbGFiZWxlcnMvY2hhcmFjdGVy\nX2xldmVsX2Nubl9tb2RlbC5wedoRZW5jb2RpbmdfZnVuY3Rpb25TAgAAcwYAAAAAAQQBCgE=\n", null, {"class_name": "__tuple__", "items": [127, 3400]}]}, "function_type": "lambda", "module": "dataprofiler.labelers.character_level_cnn_model", "output_shape": {"class_name": "__tuple__", "items": [3400]}, "output_shape_type": "raw", "output_shape_module": null, "arguments": {}}, "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 1}2 -‚root.layer_with_weights-0"_tf_keras_layer*Ë{"name": "embedding", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3400]}, "stateful": false, "must_restore_from_config": false, "class_name": "Embedding", "config": {"name": "embedding", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3400]}, "dtype": "float32", "input_dim": 129, "output_dim": 64, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}, "shared_object_id": 2}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 3400}, "inbound_nodes": [[["lambda", 0, 0, {}]]], "shared_object_id": 3, "build_input_shape": {"class_name": "TensorShape", "items": [null, null]}}2 -ç root.layer_with_weights-1"_tf_keras_layer*° {"name": "conv1d", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Conv1D", "config": {"name": "conv1d", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["embedding", 0, 0, {}]]], "shared_object_id": 6, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 3, "axes": {"-1": 64}}, "shared_object_id": 56}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 64]}}2 -¦ root.layer-4"_tf_keras_layer*ü{"name": "dropout", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["conv1d", 0, 0, {}]]], "shared_object_id": 7}2 -· root.layer_with_weights-2"_tf_keras_layer*€ {"name": "batch_normalization", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "BatchNormalization", "config": {"name": "batch_normalization", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 8}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 9}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 10}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 11}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["dropout", 0, 0, {}]]], "shared_object_id": 12, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {"2": 48}}, "shared_object_id": 57}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -ø root.layer_with_weights-3"_tf_keras_layer*Á {"name": "conv1d_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Conv1D", "config": {"name": "conv1d_1", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 13}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 14}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["batch_normalization", 0, 0, {}]]], "shared_object_id": 15, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 3, "axes": {"-1": 48}}, "shared_object_id": 58}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -­ root.layer-7"_tf_keras_layer*ƒ{"name": "dropout_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["conv1d_1", 0, 0, {}]]], "shared_object_id": 16}2 -¿  root.layer_with_weights-4"_tf_keras_layer*ˆ {"name": "batch_normalization_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "BatchNormalization", "config": {"name": "batch_normalization_1", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 17}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 18}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 19}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 20}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["dropout_1", 0, 0, {}]]], "shared_object_id": 21, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {"2": 48}}, "shared_object_id": 59}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -ú  -root.layer_with_weights-5"_tf_keras_layer*à {"name": "conv1d_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Conv1D", "config": {"name": "conv1d_2", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 22}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 23}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["batch_normalization_1", 0, 0, {}]]], "shared_object_id": 24, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 3, "axes": {"-1": 48}}, "shared_object_id": 60}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -®  root.layer-10"_tf_keras_layer*ƒ{"name": "dropout_2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["conv1d_2", 0, 0, {}]]], "shared_object_id": 25}2 -¿  root.layer_with_weights-6"_tf_keras_layer*ˆ {"name": "batch_normalization_2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "BatchNormalization", "config": {"name": "batch_normalization_2", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 26}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 27}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 28}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 29}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["dropout_2", 0, 0, {}]]], "shared_object_id": 30, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {"2": 48}}, "shared_object_id": 61}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -ú  root.layer_with_weights-7"_tf_keras_layer*à {"name": "conv1d_3", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Conv1D", "config": {"name": "conv1d_3", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 31}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 32}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["batch_normalization_2", 0, 0, {}]]], "shared_object_id": 33, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 3, "axes": {"-1": 48}}, "shared_object_id": 62}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -® root.layer-13"_tf_keras_layer*ƒ{"name": "dropout_3", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["conv1d_3", 0, 0, {}]]], "shared_object_id": 34}2 -¿ root.layer_with_weights-8"_tf_keras_layer*ˆ {"name": "batch_normalization_3", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "BatchNormalization", "config": {"name": "batch_normalization_3", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 35}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 36}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 37}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 38}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["dropout_3", 0, 0, {}]]], "shared_object_id": 39, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {"2": 48}}, "shared_object_id": 63}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -„root.layer_with_weights-9"_tf_keras_layer*Í{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 96, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 40}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 41}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["batch_normalization_3", 0, 0, {}]]], "shared_object_id": 42, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 48}}, "shared_object_id": 64}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -« root.layer-16"_tf_keras_layer*€{"name": "dropout_4", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_4", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["dense", 0, 0, {}]]], "shared_object_id": 43}2 -ýroot.layer_with_weights-10"_tf_keras_layer*Å{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 96, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 44}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 45}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dropout_4", 0, 0, {}]]], "shared_object_id": 46, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 96}}, "shared_object_id": 65}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 96]}}2 -­ root.layer-18"_tf_keras_layer*‚{"name": "dropout_5", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_5", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["dense_1", 0, 0, {}]]], "shared_object_id": 47}2 -€root.layer_with_weights-11"_tf_keras_layer*È{"name": "dense_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 24, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 48}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 49}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dropout_5", 0, 0, {}]]], "shared_object_id": 50, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 96}}, "shared_object_id": 66}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 96]}}2 -í root.layer-20"_tf_keras_layer*Â{"name": "tf_op_layer_ArgMax", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": true, "class_name": "TensorFlowOpLayer", "config": {"name": "ArgMax", "trainable": true, "dtype": "float32", "node_def": {"name": "ArgMax", "op": "ArgMax", "input": ["dense_2/truediv", "ArgMax/dimension"], "attr": {"Tidx": {"type": "DT_INT32"}, "output_type": {"type": "DT_INT64"}, "T": {"type": "DT_FLOAT"}}}, "constants": {"1": -1}}, "inbound_nodes": [[["dense_2", 0, 0, {}]]], "shared_object_id": 51}2 -Æroot.layer_with_weights-12"_tf_keras_layer*Ž{"name": "thresh_arg_max_layer", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "ThreshArgMaxLayer", "config": {"layer was saved without config": true}}2 -º“root.keras_api.metrics.0"_tf_keras_metric*‚{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 67}2 -Ê”root.keras_api.metrics.1"_tf_keras_metric*’{"class_name": "Mean", "name": "dense_2_loss", "dtype": "float32", "config": {"name": "dense_2_loss", "dtype": "float32"}, "shared_object_id": 68}2 -ã•root.keras_api.metrics.2"_tf_keras_metric*«{"class_name": "MeanMetricWrapper", "name": "acc", "dtype": "float32", "config": {"name": "acc", "dtype": "float32", "fn": "categorical_accuracy"}, "shared_object_id": 54}2 -––root.keras_api.metrics.3"_tf_keras_metric*Þ{"class_name": "Custom>F1Score", "name": "dense_2_f1_score", "dtype": "float32", "config": {"name": "dense_2_f1_score", "dtype": "float32", "num_classes": 24, "average": "micro", "threshold": null}, "shared_object_id": 55}2 \ No newline at end of file diff --git a/resources/labelers/structured_model/variables/variables.data-00000-of-00001 b/resources/labelers/structured_model/model.keras similarity index 88% rename from resources/labelers/structured_model/variables/variables.data-00000-of-00001 rename to resources/labelers/structured_model/model.keras index 95732bf16..795d637da 100644 Binary files a/resources/labelers/structured_model/variables/variables.data-00000-of-00001 and b/resources/labelers/structured_model/model.keras differ diff --git a/resources/labelers/structured_model/saved_model.pb b/resources/labelers/structured_model/saved_model.pb deleted file mode 100644 index 76274cae0..000000000 Binary files a/resources/labelers/structured_model/saved_model.pb and /dev/null differ diff --git a/resources/labelers/structured_model/variables/variables.index b/resources/labelers/structured_model/variables/variables.index deleted file mode 100644 index 627e9a577..000000000 Binary files a/resources/labelers/structured_model/variables/variables.index and /dev/null differ diff --git a/resources/labelers/unstructured_model/keras_metadata.pb b/resources/labelers/unstructured_model/keras_metadata.pb deleted file mode 100644 index dcc84a213..000000000 --- a/resources/labelers/unstructured_model/keras_metadata.pb +++ /dev/null @@ -1,29 +0,0 @@ - -ã`root"_tf_keras_network*Á`{"name": "functional_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Functional", "config": {"name": "functional_1", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "Lambda", "config": {"name": "lambda", "trainable": true, "dtype": "float32", "function": {"class_name": "__tuple__", "items": ["4wEAAAAAAAAAAgAAAAQAAAATAAAAcxIAAAB0AGoBfACIAIgBgwN9AXwBUwApAU4pAtoWQ2hhcmFj\ndGVyTGV2ZWxDbm5Nb2RlbNoUX2NoYXJfZW5jb2RpbmdfbGF5ZXIpAtoJaW5wdXRfc3RyWg5jaGFy\nX2luX3ZlY3RvcikC2hRtYXhfY2hhcl9lbmNvZGluZ19pZNoKbWF4X2xlbmd0aKkA+lMvaG9tZS91\nYnVudHUvbmV3LWRwL0RhdGFQcm9maWxlci9kYXRhcHJvZmlsZXIvbGFiZWxlcnMvY2hhcmFjdGVy\nX2xldmVsX2Nubl9tb2RlbC5wedoRZW5jb2RpbmdfZnVuY3Rpb25TAgAAcwYAAAAAAQQBCgE=\n", null, {"class_name": "__tuple__", "items": [127, 3400]}]}, "function_type": "lambda", "module": "dataprofiler.labelers.character_level_cnn_model", "output_shape": {"class_name": "__tuple__", "items": [3400]}, "output_shape_type": "raw", "output_shape_module": null, "arguments": {}}, "name": "lambda", "inbound_nodes": [[["input_1", 0, 0, {}]]]}, {"class_name": "Embedding", "config": {"name": "embedding", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3400]}, "dtype": "float32", "input_dim": 129, "output_dim": 64, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 3400}, "name": "embedding", "inbound_nodes": [[["lambda", 0, 0, {}]]]}, {"class_name": "Conv1D", "config": {"name": "conv1d", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv1d", "inbound_nodes": [[["embedding", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout", "inbound_nodes": [[["conv1d", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization", "inbound_nodes": [[["dropout", 0, 0, {}]]]}, {"class_name": "Conv1D", "config": {"name": "conv1d_1", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv1d_1", "inbound_nodes": [[["batch_normalization", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_1", "inbound_nodes": [[["conv1d_1", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_1", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_1", "inbound_nodes": [[["dropout_1", 0, 0, {}]]]}, {"class_name": "Conv1D", "config": {"name": "conv1d_2", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv1d_2", "inbound_nodes": [[["batch_normalization_1", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_2", "inbound_nodes": [[["conv1d_2", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_2", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_2", "inbound_nodes": [[["dropout_2", 0, 0, {}]]]}, {"class_name": "Conv1D", "config": {"name": "conv1d_3", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv1d_3", "inbound_nodes": [[["batch_normalization_2", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_3", "inbound_nodes": [[["conv1d_3", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_3", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_3", "inbound_nodes": [[["dropout_3", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 96, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["batch_normalization_3", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_4", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_4", "inbound_nodes": [[["dense", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 96, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_1", "inbound_nodes": [[["dropout_4", 0, 0, {}]]]}, {"class_name": "Dropout", "config": {"name": "dropout_5", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "name": "dropout_5", "inbound_nodes": [[["dense_1", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 24, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_2", "inbound_nodes": [[["dropout_5", 0, 0, {}]]]}, {"class_name": "TensorFlowOpLayer", "config": {"name": "ArgMax", "trainable": true, "dtype": "float32", "node_def": {"name": "ArgMax", "op": "ArgMax", "input": ["dense_2/truediv", "ArgMax/dimension"], "attr": {"Tidx": {"type": "DT_INT32"}, "output_type": {"type": "DT_INT64"}, "T": {"type": "DT_FLOAT"}}}, "constants": {"1": -1}}, "name": "tf_op_layer_ArgMax", "inbound_nodes": [[["dense_2", 0, 0, {}]]]}, {"class_name": "ThreshArgMaxLayer", "config": {"layer was saved without config": true}, "name": "thresh_arg_max_layer", "inbound_nodes": [[["tf_op_layer_ArgMax", 0, 0, {"confidence_layer": ["dense_2", 0, 0]}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense_2", 0, 0], ["tf_op_layer_ArgMax", 0, 0], ["thresh_arg_max_layer", 0, 0]]}, "shared_object_id": 52, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, null]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, null]}, "string", "input_1"]}, "keras_version": "2.6.0", "backend": "tensorflow", "model_config": {"class_name": "Functional"}, "training_config": {"loss": {"dense_2": "categorical_crossentropy"}, "metrics": [[{"class_name": "MeanMetricWrapper", "config": {"name": "acc", "dtype": "float32", "fn": "categorical_accuracy"}, "shared_object_id": 54}, {"class_name": "Custom>F1Score", "config": {"name": "dense_2_f1_score", "dtype": "float32", "num_classes": 24, "average": "micro", "threshold": null}, "shared_object_id": 55}], [null], [null]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Adam", "config": {"name": "Adam", "learning_rate": 0.0010000000474974513, "decay": 0.0, "beta_1": 0.8999999761581421, "beta_2": 0.9990000128746033, "epsilon": 1e-07, "amsgrad": false}}}}2 -ú root.layer-0"_tf_keras_input_layer*Ê{"class_name": "InputLayer", "name": "input_1", "dtype": "string", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, null]}, "dtype": "string", "sparse": false, "ragged": false, "name": "input_1"}}2 -Å root.layer-1"_tf_keras_layer*›{"name": "lambda", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Lambda", "config": {"name": "lambda", "trainable": true, "dtype": "float32", "function": {"class_name": "__tuple__", "items": ["4wEAAAAAAAAAAgAAAAQAAAATAAAAcxIAAAB0AGoBfACIAIgBgwN9AXwBUwApAU4pAtoWQ2hhcmFj\ndGVyTGV2ZWxDbm5Nb2RlbNoUX2NoYXJfZW5jb2RpbmdfbGF5ZXIpAtoJaW5wdXRfc3RyWg5jaGFy\nX2luX3ZlY3RvcikC2hRtYXhfY2hhcl9lbmNvZGluZ19pZNoKbWF4X2xlbmd0aKkA+lMvaG9tZS91\nYnVudHUvbmV3LWRwL0RhdGFQcm9maWxlci9kYXRhcHJvZmlsZXIvbGFiZWxlcnMvY2hhcmFjdGVy\nX2xldmVsX2Nubl9tb2RlbC5wedoRZW5jb2RpbmdfZnVuY3Rpb25TAgAAcwYAAAAAAQQBCgE=\n", null, {"class_name": "__tuple__", "items": [127, 3400]}]}, "function_type": "lambda", "module": "dataprofiler.labelers.character_level_cnn_model", "output_shape": {"class_name": "__tuple__", "items": [3400]}, "output_shape_type": "raw", "output_shape_module": null, "arguments": {}}, "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 1}2 -‚root.layer_with_weights-0"_tf_keras_layer*Ë{"name": "embedding", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3400]}, "stateful": false, "must_restore_from_config": false, "class_name": "Embedding", "config": {"name": "embedding", "trainable": true, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 3400]}, "dtype": "float32", "input_dim": 129, "output_dim": 64, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}, "shared_object_id": 2}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 3400}, "inbound_nodes": [[["lambda", 0, 0, {}]]], "shared_object_id": 3, "build_input_shape": {"class_name": "TensorShape", "items": [null, null]}}2 -ç root.layer_with_weights-1"_tf_keras_layer*° {"name": "conv1d", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Conv1D", "config": {"name": "conv1d", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["embedding", 0, 0, {}]]], "shared_object_id": 6, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 3, "axes": {"-1": 64}}, "shared_object_id": 56}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 64]}}2 -¦ root.layer-4"_tf_keras_layer*ü{"name": "dropout", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["conv1d", 0, 0, {}]]], "shared_object_id": 7}2 -· root.layer_with_weights-2"_tf_keras_layer*€ {"name": "batch_normalization", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "BatchNormalization", "config": {"name": "batch_normalization", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 8}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 9}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 10}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 11}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["dropout", 0, 0, {}]]], "shared_object_id": 12, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {"2": 48}}, "shared_object_id": 57}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -ø root.layer_with_weights-3"_tf_keras_layer*Á {"name": "conv1d_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Conv1D", "config": {"name": "conv1d_1", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 13}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 14}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["batch_normalization", 0, 0, {}]]], "shared_object_id": 15, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 3, "axes": {"-1": 48}}, "shared_object_id": 58}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -­ root.layer-7"_tf_keras_layer*ƒ{"name": "dropout_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["conv1d_1", 0, 0, {}]]], "shared_object_id": 16}2 -¿  root.layer_with_weights-4"_tf_keras_layer*ˆ {"name": "batch_normalization_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "BatchNormalization", "config": {"name": "batch_normalization_1", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 17}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 18}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 19}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 20}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["dropout_1", 0, 0, {}]]], "shared_object_id": 21, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {"2": 48}}, "shared_object_id": 59}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -ú  -root.layer_with_weights-5"_tf_keras_layer*à {"name": "conv1d_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Conv1D", "config": {"name": "conv1d_2", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 22}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 23}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["batch_normalization_1", 0, 0, {}]]], "shared_object_id": 24, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 3, "axes": {"-1": 48}}, "shared_object_id": 60}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -®  root.layer-10"_tf_keras_layer*ƒ{"name": "dropout_2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["conv1d_2", 0, 0, {}]]], "shared_object_id": 25}2 -¿  root.layer_with_weights-6"_tf_keras_layer*ˆ {"name": "batch_normalization_2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "BatchNormalization", "config": {"name": "batch_normalization_2", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 26}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 27}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 28}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 29}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["dropout_2", 0, 0, {}]]], "shared_object_id": 30, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {"2": 48}}, "shared_object_id": 61}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -ú  root.layer_with_weights-7"_tf_keras_layer*à {"name": "conv1d_3", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Conv1D", "config": {"name": "conv1d_3", "trainable": true, "dtype": "float32", "filters": 48, "kernel_size": {"class_name": "__tuple__", "items": [13]}, "strides": {"class_name": "__tuple__", "items": [1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1]}, "groups": 1, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 31}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 32}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["batch_normalization_2", 0, 0, {}]]], "shared_object_id": 33, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 3, "axes": {"-1": 48}}, "shared_object_id": 62}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -® root.layer-13"_tf_keras_layer*ƒ{"name": "dropout_3", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["conv1d_3", 0, 0, {}]]], "shared_object_id": 34}2 -¿ root.layer_with_weights-8"_tf_keras_layer*ˆ {"name": "batch_normalization_3", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "BatchNormalization", "config": {"name": "batch_normalization_3", "trainable": true, "dtype": "float32", "axis": [2], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 35}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 36}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 37}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 38}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["dropout_3", 0, 0, {}]]], "shared_object_id": 39, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {"2": 48}}, "shared_object_id": 63}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -„root.layer_with_weights-9"_tf_keras_layer*Í{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 96, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 40}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 41}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["batch_normalization_3", 0, 0, {}]]], "shared_object_id": 42, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 48}}, "shared_object_id": 64}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 48]}}2 -« root.layer-16"_tf_keras_layer*€{"name": "dropout_4", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_4", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["dense", 0, 0, {}]]], "shared_object_id": 43}2 -ýroot.layer_with_weights-10"_tf_keras_layer*Å{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 96, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 44}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 45}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dropout_4", 0, 0, {}]]], "shared_object_id": 46, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 96}}, "shared_object_id": 65}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 96]}}2 -­ root.layer-18"_tf_keras_layer*‚{"name": "dropout_5", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dropout", "config": {"name": "dropout_5", "trainable": true, "dtype": "float32", "rate": 0.073, "noise_shape": null, "seed": null}, "inbound_nodes": [[["dense_1", 0, 0, {}]]], "shared_object_id": 47}2 -€root.layer_with_weights-11"_tf_keras_layer*È{"name": "dense_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 24, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 48}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 49}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dropout_5", 0, 0, {}]]], "shared_object_id": 50, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 96}}, "shared_object_id": 66}, "build_input_shape": {"class_name": "TensorShape", "items": [null, null, 96]}}2 -í root.layer-20"_tf_keras_layer*Â{"name": "tf_op_layer_ArgMax", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": true, "class_name": "TensorFlowOpLayer", "config": {"name": "ArgMax", "trainable": true, "dtype": "float32", "node_def": {"name": "ArgMax", "op": "ArgMax", "input": ["dense_2/truediv", "ArgMax/dimension"], "attr": {"Tidx": {"type": "DT_INT32"}, "output_type": {"type": "DT_INT64"}, "T": {"type": "DT_FLOAT"}}}, "constants": {"1": -1}}, "inbound_nodes": [[["dense_2", 0, 0, {}]]], "shared_object_id": 51}2 -Æroot.layer_with_weights-12"_tf_keras_layer*Ž{"name": "thresh_arg_max_layer", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "ThreshArgMaxLayer", "config": {"layer was saved without config": true}}2 -º“root.keras_api.metrics.0"_tf_keras_metric*‚{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 67}2 -Ê”root.keras_api.metrics.1"_tf_keras_metric*’{"class_name": "Mean", "name": "dense_2_loss", "dtype": "float32", "config": {"name": "dense_2_loss", "dtype": "float32"}, "shared_object_id": 68}2 -ã•root.keras_api.metrics.2"_tf_keras_metric*«{"class_name": "MeanMetricWrapper", "name": "acc", "dtype": "float32", "config": {"name": "acc", "dtype": "float32", "fn": "categorical_accuracy"}, "shared_object_id": 54}2 -––root.keras_api.metrics.3"_tf_keras_metric*Þ{"class_name": "Custom>F1Score", "name": "dense_2_f1_score", "dtype": "float32", "config": {"name": "dense_2_f1_score", "dtype": "float32", "num_classes": 24, "average": "micro", "threshold": null}, "shared_object_id": 55}2 \ No newline at end of file diff --git a/resources/labelers/unstructured_model/variables/variables.data-00000-of-00001 b/resources/labelers/unstructured_model/model.keras similarity index 88% rename from resources/labelers/unstructured_model/variables/variables.data-00000-of-00001 rename to resources/labelers/unstructured_model/model.keras index 95732bf16..795d637da 100644 Binary files a/resources/labelers/unstructured_model/variables/variables.data-00000-of-00001 and b/resources/labelers/unstructured_model/model.keras differ diff --git a/resources/labelers/unstructured_model/saved_model.pb b/resources/labelers/unstructured_model/saved_model.pb deleted file mode 100644 index 76274cae0..000000000 Binary files a/resources/labelers/unstructured_model/saved_model.pb and /dev/null differ diff --git a/resources/labelers/unstructured_model/variables/variables.index b/resources/labelers/unstructured_model/variables/variables.index deleted file mode 100644 index 627e9a577..000000000 Binary files a/resources/labelers/unstructured_model/variables/variables.index and /dev/null differ diff --git a/setup.py b/setup.py index f8b5eaf8e..eeca6629b 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ setup( name="DataProfiler", version=__version__, - python_requires=">=3.8", + python_requires=">=3.9", description=DESCRIPTION, long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", diff --git a/tox.ini b/tox.ini index 55fa50147..21d418e98 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py37, py38, py39, py310, docs, pypi-description, manifest, precom +envlist = py39, py310, 311, pypi-description, manifest, precom [testenv] @@ -16,32 +16,37 @@ deps = -rrequirements-reports.txt -rrequirements-test.txt commands = - python3 -m pytest dataprofiler/tests/ --cov=dataprofiler --cov-fail-under=80 --cov-report=xml:dist/coverage.xml --forked + python3 -m pytest dataprofiler/tests/ --cov=dataprofiler --cov-fail-under=80 --cov-report=xml:coverage.xml --forked +# add "docs" to `envlist` to run the docs build #[testenv:docs] #extras = docs #changedir = docs #commands = sphinx-build -b html source _build [testenv:pypi-description] -skip_install = true deps = + {[testenv]deps} twine wheel - pip >= 19.0.0 +skip_install = true commands = python setup.py sdist bdist_wheel twine check dist/* [testenv:manifest] -deps = check-manifest +deps = + {[testenv]deps} + check-manifest skip_install = true commands = check-manifest # skip isort for infinite loop issues between tox and top level settings [testenv:precom] skip_install = true -deps = pre-commit +deps = + {[testenv]deps} + pre-commit commands = pre-commit run black --all-files --verbose # if you use the walrus operator on Python 3.8 disable the flake8 check