diff --git a/docs/source/misc/changelog.md b/docs/source/misc/changelog.md index 939298f2..0e8f91ac 100644 --- a/docs/source/misc/changelog.md +++ b/docs/source/misc/changelog.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.11] - 2021-01-18 + +### Changed + +- Adding rank feature to serving parse fn by default and removing dependence on required serving_info attribute + ## [0.1.10] - 2021-12-29 ### Changed diff --git a/python/ml4ir/applications/ranking/tests/data/configs/feature_config.yaml b/python/ml4ir/applications/ranking/tests/data/configs/feature_config.yaml index a2b0fc33..9cbc0662 100644 --- a/python/ml4ir/applications/ranking/tests/data/configs/feature_config.yaml +++ b/python/ml4ir/applications/ranking/tests/data/configs/feature_config.yaml @@ -9,8 +9,6 @@ query_key: shape: null serving_info: name: queryId - required: false - default_value: "" tfrecord_type: context rank: name: rank @@ -23,7 +21,6 @@ rank: shape: null serving_info: name: originalRank - required: true default_value: 0 tfrecord_type: sequence label: @@ -37,8 +34,6 @@ label: shape: null serving_info: name: clicked - required: false - default_value: 0 tfrecord_type: sequence features: - name: text_match_score @@ -51,8 +46,6 @@ features: shape: null serving_info: name: textMatchScore - required: true - default_value: 0.0 tfrecord_type: sequence - name: page_views_score node_name: page_views_score @@ -77,8 +70,6 @@ features: clip_value_max: 1000000. serving_info: name: pageViewsScore - required: true - default_value: 0.0 tfrecord_type: sequence - name: quality_score node_name: quality_score @@ -90,7 +81,6 @@ features: shape: null serving_info: name: qualityScore - required: false tfrecord_type: sequence - name: name_match node_name: name_match @@ -103,8 +93,6 @@ features: shape: null serving_info: name: nameMatch - required: true - default_value: 0.0 tfrecord_type: sequence - name: query_text node_name: query_text @@ -127,8 +115,6 @@ features: to_lower: true serving_info: name: q - required: true - default_value: "" tfrecord_type: context - name: domain_id node_name: domain_id @@ -146,8 +132,6 @@ features: default_value: null serving_info: name: domainID - required: true - default_value: 0 tfrecord_type: context - name: domain_name node_name: domain_name @@ -166,7 +150,5 @@ features: num_oov_buckets: 1 serving_info: name: domainName - required: true - default_value: "" tfrecord_type: context diff --git a/python/ml4ir/base/data/tfrecord_reader.py b/python/ml4ir/base/data/tfrecord_reader.py index 5962273f..2fc0cacf 100644 --- a/python/ml4ir/base/data/tfrecord_reader.py +++ b/python/ml4ir/base/data/tfrecord_reader.py @@ -287,11 +287,12 @@ def get_features_spec(self): for feature_info in self.feature_config.get_all_features(): serving_info = feature_info["serving_info"] if not self.required_fields_only or serving_info.get( - "required", feature_info["trainable"]) or feature_info["trainable"]: + "required", feature_info["trainable"]) or feature_info["trainable"]: feature_name = feature_info["name"] dtype = feature_info["dtype"] default_value = self.feature_config.get_default_value(feature_info) - features_spec[feature_name] = io.FixedLenFeature([], dtype, default_value=default_value) + features_spec[feature_name] = io.FixedLenFeature( + [], dtype, default_value=default_value) return features_spec @@ -354,7 +355,7 @@ def get_feature(self, feature_info, extracted_features, sequence_size=0): default_tensor = self.get_default_tensor(feature_info, sequence_size) feature_tensor = extracted_features.get(feature_info["name"], default_tensor) - + # Adjust shape feature_tensor = tf.expand_dims(feature_tensor, axis=0) @@ -455,8 +456,10 @@ def get_features_spec(self): if feature_info.get("name") == self.feature_config.get_mask("name"): continue serving_info = feature_info["serving_info"] - if not self.required_fields_only or serving_info.get( - "required", feature_info["trainable"]) or feature_info["trainable"]: + if not self.required_fields_only or feature_info["trainable"] or \ + (serving_info.get("required", feature_info["trainable"])) or \ + (feature_info.get("name") == self.feature_config.get_rank("name")): + feature_name = feature_info["name"] dtype = feature_info["dtype"] default_value = self.feature_config.get_default_value( @@ -585,7 +588,7 @@ def generate_and_add_mask(self, extracted_features, features_dict): context_features, sequence_features = extracted_features if ( self.required_fields_only - and not self.feature_config.get_rank("serving_info")["required"] + and not self.feature_config.get_rank("serving_info").get("required", True) ): """ Define dummy mask if the rank field is not a required field for serving diff --git a/python/ml4ir/base/features/feature_config.py b/python/ml4ir/base/features/feature_config.py index f1e2d356..1add9f1a 100644 --- a/python/ml4ir/base/features/feature_config.py +++ b/python/ml4ir/base/features/feature_config.py @@ -1018,7 +1018,10 @@ def create_dummy_protobuf(self, num_records=1, required_only=False): sequence_features = [ f for f in self.get_sequence_features() - if ((not required_only) or (f["serving_info"].get("required", False)) or f["trainable"]) + if ((not required_only) or \ + (f["serving_info"].get("required", False)) or \ + f["trainable"] or \ + (f["name"] == self.get_rank("name"))) ] dummy_query = dict() diff --git a/python/setup.py b/python/setup.py index 02061979..e6d13082 100644 --- a/python/setup.py +++ b/python/setup.py @@ -24,7 +24,7 @@ def getReadMe(): setup( name="ml4ir", packages=find_namespace_packages(include=["ml4ir.*"]), - version="0.1.10", + version="0.1.11", description="Machine Learning libraries for Information Retrieval", long_description=getReadMe(), long_description_content_type="text/markdown",