From 8524de9644577e3b3027be5716f017b4a08867f3 Mon Sep 17 00:00:00 2001
From: Antonin Delpeuch
Date: Thu, 14 Dec 2023 10:26:29 +0100
Subject: [PATCH 1/3] Move 'query' field inside 'properties'
Closes #134. Closes #106.
---
.../invalid/misnamed-property.json | 8 +++--
.../invalid/multiple-types.json | 11 ++++--
.../valid/example-full.json | 8 +++--
.../valid/example-min.json | 12 +++++--
.../valid/multi-values.json | 4 ++-
.../valid/text-processing-language.json | 4 ++-
draft/index.html | 34 +++++++++++--------
draft/schemas/reconciliation-query-batch.json | 29 +++-------------
8 files changed, 62 insertions(+), 48 deletions(-)
diff --git a/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json b/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json
index 5781c9a..f51e78e 100644
--- a/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json
+++ b/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json
@@ -1,10 +1,12 @@
{
"queries": [
{
- "query": "Christel Hanewinckel",
"type": "DifferentiatedPerson",
"limit": 5,
"props": [
+ {
+ "v": "Christel Hanewinckel"
+ },
{
"pid": "professionOrOccupation",
"v": "Politik*"
@@ -17,10 +19,12 @@
"type_strict": "should"
},
{
- "query": "Franz Thönnes",
"type": "DifferentiatedPerson",
"limit": 5,
"props": [
+ {
+ "v": "Franz Thönnes"
+ },
{
"pid": "professionOrOccupation",
"v": "Politik*"
diff --git a/draft/examples/reconciliation-query-batch/invalid/multiple-types.json b/draft/examples/reconciliation-query-batch/invalid/multiple-types.json
index b2a0f8d..ee00a4e 100644
--- a/draft/examples/reconciliation-query-batch/invalid/multiple-types.json
+++ b/draft/examples/reconciliation-query-batch/invalid/multiple-types.json
@@ -1,8 +1,15 @@
{
"queries": [
{
- "query": "Christel Hanewinckel",
- "type": ["DifferentiatedPerson", "FictionalCharacter"],
+ "properties": [
+ {
+ "v": "Christel Hanewinckel"
+ }
+ ],
+ "type": [
+ "DifferentiatedPerson",
+ "FictionalCharacter"
+ ],
"limit": 5
}
]
diff --git a/draft/examples/reconciliation-query-batch/valid/example-full.json b/draft/examples/reconciliation-query-batch/valid/example-full.json
index 5ddacf5..fc2c6fa 100644
--- a/draft/examples/reconciliation-query-batch/valid/example-full.json
+++ b/draft/examples/reconciliation-query-batch/valid/example-full.json
@@ -1,10 +1,12 @@
{
"queries": [
{
- "query": "Christel Hanewinckel",
"type": "DifferentiatedPerson",
"limit": 5,
"properties": [
+ {
+ "v": "Christel Hanewinckel"
+ },
{
"pid": "professionOrOccupation",
"v": "Politik*"
@@ -16,10 +18,12 @@
]
},
{
- "query": "Franz Thönnes",
"type": "DifferentiatedPerson",
"limit": 5,
"properties": [
+ {
+ "v": "Franz Thönnes"
+ },
{
"pid": "professionOrOccupation",
"v": "Politik*"
diff --git a/draft/examples/reconciliation-query-batch/valid/example-min.json b/draft/examples/reconciliation-query-batch/valid/example-min.json
index 130e18b..785eda3 100644
--- a/draft/examples/reconciliation-query-batch/valid/example-min.json
+++ b/draft/examples/reconciliation-query-batch/valid/example-min.json
@@ -1,10 +1,18 @@
{
"queries": [
{
- "query": "Hans-Eberhard Urbaniak"
+ "properties": [
+ {
+ "v": "Hans-Eberhard Urbaniak"
+ }
+ ]
},
{
- "query": "Ernst Schwanhold"
+ "properties": [
+ {
+ "v": "Ernst Schwanhold"
+ }
+ ]
}
]
}
diff --git a/draft/examples/reconciliation-query-batch/valid/multi-values.json b/draft/examples/reconciliation-query-batch/valid/multi-values.json
index aec03de..caaa898 100644
--- a/draft/examples/reconciliation-query-batch/valid/multi-values.json
+++ b/draft/examples/reconciliation-query-batch/valid/multi-values.json
@@ -1,10 +1,12 @@
{
"queries": [
{
- "query": "Christel Hanewinckel",
"type": "DifferentiatedPerson",
"limit": 5,
"properties": [
+ {
+ "v": "Christel Hanewinckel"
+ },
{
"pid": "professionOrOccupation",
"v": [
diff --git a/draft/examples/reconciliation-query-batch/valid/text-processing-language.json b/draft/examples/reconciliation-query-batch/valid/text-processing-language.json
index c16ce91..2f67c31 100644
--- a/draft/examples/reconciliation-query-batch/valid/text-processing-language.json
+++ b/draft/examples/reconciliation-query-batch/valid/text-processing-language.json
@@ -1,9 +1,11 @@
{
"queries": [
{
- "query": "Deng Shuping",
"lang": "en",
"properties": [
+ {
+ "v": "Deng Shuping"
+ },
{
"pid": "professionOrOccupation",
"v": "art historian"
diff --git a/draft/index.html b/draft/index.html
index 4fde551..9405e10 100644
--- a/draft/index.html
+++ b/draft/index.html
@@ -478,29 +478,35 @@ Reconciliation Queries
Structure of a Reconciliation Query
A reconciliation query consists of the following fields.
- At least one of query
or properties
must be supplied, but all other
- fields are optional.
- query
- - A query string, consisting of a non-empty string.
- By supplying such a string, a client intends to search for entities with similar
- names. The specifics of how this similarity is defined are determined by the service.
+ properties
+ - An array of property assignments (at least one).
type
- - A type identifier. Supplying such a type allows users to restrict
+
- An optional type identifier. Supplying such a type allows users to restrict
the search to entities which bear this type. Whether this restriction should be a
hard constraint or simply induce a change on the reconciliation scores can be
determined by the service. In particular, services MAY return candidates which
do not belong to the supplied type;
limit
- - A limit on the number of candidates to return, which must be a positive integer;
- properties
- - An array of objects, where each object maps a property identifier (in the
pid
field)
- to one or more property values (in the v
field). These are used to further filter the set of candidates (similar to a WHERE clause in SQL),
- by allowing clients to specify other attributes of entities that should match, beyond their name in the query
field.
- How reconciliation services handle this further restriction ("must match all properties" or "should match some") and how it affects the score, is up to the service.
- A reconciliation service that supports properties SHOULD provide a suggest service for discovering these properties;
+ - An optional limit on the number of candidates to return, which must be a positive integer;
+
+ A property assignment specifies the expected value of a property on the entities to match.
+ These are used to filter the set of candidates (similar to a WHERE clause in SQL),
+ by allowing clients to specify an attribute of entities that should match. It consists of:
+
+ pid
+ - A property identifier. If this is not provided, then this signals that
+ the client intends to search for entities with similar names. The specifics of how this similarity
+ is defined are determined by the service.
+ v
+ - one or more property values.
+
+
+ How reconciliation services handle this further restriction ("must match all properties" or "should match some") and how it affects the score, is up to the service.
+ A reconciliation service that supports properties SHOULD provide a suggest service for discovering these properties.
+
A reconciliation query batch is an array of reconciliation queries.
diff --git a/draft/schemas/reconciliation-query-batch.json b/draft/schemas/reconciliation-query-batch.json
index b2c967e..a776908 100644
--- a/draft/schemas/reconciliation-query-batch.json
+++ b/draft/schemas/reconciliation-query-batch.json
@@ -43,10 +43,6 @@
"items": {
"type": "object",
"properties": {
- "query": {
- "type": "string",
- "description": "A string to be matched against the name of the entities"
- },
"type": {
"description": "A type identifier indicating which class of entities to restrict the search to",
"type": "string"
@@ -61,13 +57,14 @@
},
"properties": {
"type": "array",
- "description": "An optional list of property mappings to refine the query",
+ "minItems": 1,
+ "description": "A list of property mappings to select candidates",
"items": {
"type": "object",
"properties": {
"pid": {
"type": "string",
- "description": "The identifier of the property, whose values will be compared to the values supplied"
+ "description": "The identifier of the property, whose values will be compared to the values supplied. If absent, values will be matched against the entity names"
},
"v": {
"description": "A value (or array of values) to match against the property values associated with the property on each candidate",
@@ -85,7 +82,6 @@
}
},
"required": [
- "pid",
"v"
]
}
@@ -100,23 +96,8 @@
]
}
},
- "anyOf": [
- {
- "required": [
- "query"
- ]
- },
- {
- "required": [
- "properties"
- ],
- "properties": {
- "properties": {
- "type": "array",
- "minItems": 1
- }
- }
- }
+ "required": [
+ "properties"
],
"additionalProperties": false
}
From 68186b00f9340777c7a7bc526d7b1e22ff5908e2 Mon Sep 17 00:00:00 2001
From: Antonin Delpeuch
Date: Mon, 26 Feb 2024 12:10:32 +0100
Subject: [PATCH 2/3] Introduce match_type as suggested by Osma
---
...-properties.json => empty-conditions.json} | 2 +-
...property.json => misnamed-conditions.json} | 0
.../invalid/missing-match-type.json | 18 +++++++++++
.../invalid/multiple-types.json | 3 +-
.../invalid/no-root-object.json | 16 ++++++++--
.../valid/example-full.json | 10 +++++--
.../valid/example-min.json | 6 ++--
.../valid/multi-values.json | 4 ++-
.../valid/no-query-string.json | 30 ++++++++++++++-----
.../valid/text-processing-language.json | 5 +++-
draft/index.html | 18 ++++++-----
draft/schemas/reconciliation-query-batch.json | 21 +++++++++----
12 files changed, 101 insertions(+), 32 deletions(-)
rename draft/examples/reconciliation-query-batch/invalid/{empty-properties.json => empty-conditions.json} (60%)
rename draft/examples/reconciliation-query-batch/invalid/{misnamed-property.json => misnamed-conditions.json} (100%)
create mode 100644 draft/examples/reconciliation-query-batch/invalid/missing-match-type.json
diff --git a/draft/examples/reconciliation-query-batch/invalid/empty-properties.json b/draft/examples/reconciliation-query-batch/invalid/empty-conditions.json
similarity index 60%
rename from draft/examples/reconciliation-query-batch/invalid/empty-properties.json
rename to draft/examples/reconciliation-query-batch/invalid/empty-conditions.json
index c2c6072..e60930d 100644
--- a/draft/examples/reconciliation-query-batch/invalid/empty-properties.json
+++ b/draft/examples/reconciliation-query-batch/invalid/empty-conditions.json
@@ -1,7 +1,7 @@
{
"queries": [
{
- "properties": []
+ "conditions": []
}
]
}
diff --git a/draft/examples/reconciliation-query-batch/invalid/misnamed-property.json b/draft/examples/reconciliation-query-batch/invalid/misnamed-conditions.json
similarity index 100%
rename from draft/examples/reconciliation-query-batch/invalid/misnamed-property.json
rename to draft/examples/reconciliation-query-batch/invalid/misnamed-conditions.json
diff --git a/draft/examples/reconciliation-query-batch/invalid/missing-match-type.json b/draft/examples/reconciliation-query-batch/invalid/missing-match-type.json
new file mode 100644
index 0000000..843c25c
--- /dev/null
+++ b/draft/examples/reconciliation-query-batch/invalid/missing-match-type.json
@@ -0,0 +1,18 @@
+{
+ "queries": [
+ {
+ "conditions": [
+ {
+ "v": "Hans-Eberhard Urbaniak"
+ }
+ ]
+ },
+ {
+ "conditions": [
+ {
+ "v": "Ernst Schwanhold"
+ }
+ ]
+ }
+ ]
+}
diff --git a/draft/examples/reconciliation-query-batch/invalid/multiple-types.json b/draft/examples/reconciliation-query-batch/invalid/multiple-types.json
index ee00a4e..d2f3fb7 100644
--- a/draft/examples/reconciliation-query-batch/invalid/multiple-types.json
+++ b/draft/examples/reconciliation-query-batch/invalid/multiple-types.json
@@ -1,8 +1,9 @@
{
"queries": [
{
- "properties": [
+ "conditions": [
{
+ "match_type": "name",
"v": "Christel Hanewinckel"
}
],
diff --git a/draft/examples/reconciliation-query-batch/invalid/no-root-object.json b/draft/examples/reconciliation-query-batch/invalid/no-root-object.json
index 0435777..143d568 100644
--- a/draft/examples/reconciliation-query-batch/invalid/no-root-object.json
+++ b/draft/examples/reconciliation-query-batch/invalid/no-root-object.json
@@ -1,8 +1,20 @@
[
{
- "properties": [{"pid":"uid", "v": "27eb892afbb2"}]
+ "conditions": [
+ {
+ "match_type": "property",
+ "pid": "uid",
+ "v": "27eb892afbb2"
+ }
+ ]
},
{
- "properties": [{"pid":"uid", "v": "ab09da9dd37e"}]
+ "conditions": [
+ {
+ "match_type": "property",
+ "pid": "uid",
+ "v": "ab09da9dd37e"
+ }
+ ]
}
]
diff --git a/draft/examples/reconciliation-query-batch/valid/example-full.json b/draft/examples/reconciliation-query-batch/valid/example-full.json
index 2e20c2a..6c61a09 100644
--- a/draft/examples/reconciliation-query-batch/valid/example-full.json
+++ b/draft/examples/reconciliation-query-batch/valid/example-full.json
@@ -3,11 +3,13 @@
{
"type": "DifferentiatedPerson",
"limit": 5,
- "properties": [
+ "conditions": [
{
+ "match_type": "name",
"v": "Christel Hanewinckel"
},
{
+ "match_type": "property",
"pid": "professionOrOccupation",
"v": "Politik*",
"required": false,
@@ -15,6 +17,7 @@
"match_qualifier": "WildcardMatch"
},
{
+ "match_type": "property",
"pid": "affiliation",
"v": "http://d-nb.info/gnd/2022139-3",
"required": false,
@@ -26,11 +29,13 @@
{
"type": "DifferentiatedPerson",
"limit": 5,
- "properties": [
+ "conditions": [
{
+ "match_type": "name",
"v": "Franz Thönnes"
},
{
+ "match_type": "property",
"pid": "professionOrOccupation",
"v": "Politik*",
"required": false,
@@ -38,6 +43,7 @@
"match_qualifier": "WildcardMatch"
},
{
+ "match_type": "property",
"pid": "affiliation",
"v": "http://d-nb.info/gnd/2022139-3",
"required": false,
diff --git a/draft/examples/reconciliation-query-batch/valid/example-min.json b/draft/examples/reconciliation-query-batch/valid/example-min.json
index 785eda3..8d5d776 100644
--- a/draft/examples/reconciliation-query-batch/valid/example-min.json
+++ b/draft/examples/reconciliation-query-batch/valid/example-min.json
@@ -1,15 +1,17 @@
{
"queries": [
{
- "properties": [
+ "conditions": [
{
+ "match_type": "name",
"v": "Hans-Eberhard Urbaniak"
}
]
},
{
- "properties": [
+ "conditions": [
{
+ "match_type": "name",
"v": "Ernst Schwanhold"
}
]
diff --git a/draft/examples/reconciliation-query-batch/valid/multi-values.json b/draft/examples/reconciliation-query-batch/valid/multi-values.json
index caaa898..f4ba3b2 100644
--- a/draft/examples/reconciliation-query-batch/valid/multi-values.json
+++ b/draft/examples/reconciliation-query-batch/valid/multi-values.json
@@ -3,11 +3,13 @@
{
"type": "DifferentiatedPerson",
"limit": 5,
- "properties": [
+ "conditions": [
{
+ "match_type": "name",
"v": "Christel Hanewinckel"
},
{
+ "match_type": "property",
"pid": "professionOrOccupation",
"v": [
"Politik*",
diff --git a/draft/examples/reconciliation-query-batch/valid/no-query-string.json b/draft/examples/reconciliation-query-batch/valid/no-query-string.json
index eb027b4..ee78785 100644
--- a/draft/examples/reconciliation-query-batch/valid/no-query-string.json
+++ b/draft/examples/reconciliation-query-batch/valid/no-query-string.json
@@ -1,8 +1,22 @@
-{"queries":[
- {
- "properties": [{"pid":"uid", "v": "27eb892afbb2"}]
- },
- {
- "properties": [{"pid":"uid", "v": "ab09da9dd37e"}]
- }
-]}
+{
+ "queries": [
+ {
+ "conditions": [
+ {
+ "match_type": "property",
+ "pid": "uid",
+ "v": "27eb892afbb2"
+ }
+ ]
+ },
+ {
+ "conditions": [
+ {
+ "match_type": "property",
+ "pid": "uid",
+ "v": "ab09da9dd37e"
+ }
+ ]
+ }
+ ]
+}
diff --git a/draft/examples/reconciliation-query-batch/valid/text-processing-language.json b/draft/examples/reconciliation-query-batch/valid/text-processing-language.json
index 2f67c31..dda0f9b 100644
--- a/draft/examples/reconciliation-query-batch/valid/text-processing-language.json
+++ b/draft/examples/reconciliation-query-batch/valid/text-processing-language.json
@@ -2,15 +2,18 @@
"queries": [
{
"lang": "en",
- "properties": [
+ "conditions": [
{
+ "match_type": "name",
"v": "Deng Shuping"
},
{
+ "match_type": "property",
"pid": "professionOrOccupation",
"v": "art historian"
},
{
+ "match_type": "property",
"pid": "variantName",
"v": "鄧淑蘋",
"lang": "zh-Hant"
diff --git a/draft/index.html b/draft/index.html
index 19d0991..021f10b 100644
--- a/draft/index.html
+++ b/draft/index.html
@@ -482,8 +482,8 @@ Structure of a Reconciliation Query
A reconciliation query consists of the following fields.
- properties
- - An array of property assignments (at least one).
+ conditions
+ - An array of conditions (at least one).
type
- An optional type identifier. Supplying such a type allows users to restrict
the search to entities which bear this type. Whether this restriction should be a
@@ -495,16 +495,18 @@
Structure of a Reconciliation Query
- A property assignment specifies the expected value of a property on the entities to match.
- These are used to filter the set of candidates (similar to a WHERE clause in SQL),
+ A condition specifies a constraint that should be matched by the entities to return.
+ It is used to filter the set of candidates (similar to a WHERE clause in SQL),
by allowing clients to specify an attribute of entities that should match. It consists of:
+ match_type
+ - Either
name
or property
, depending on whether the condition related to entity names or their properties.
pid
- - A property identifier. If this is not provided, then this signals that
- the client intends to search for entities with similar names. The specifics of how this similarity
- is defined are determined by the service.
+ - A property identifier, to be provided if and only if the
match_type
is property
.
v
- - one or more property values.
+
- one or more property values. If
match_type
is name
, then this value is to
+ be matched to entity names, otherwise to the property values via the supplied pid
. The specifics of how this similarity
+ is defined are determined by the service.
required
- An optional boolean indicating if a match for the property is required for an entity to enter the list of candidates (i.e. acting like a filter or a WHERE clause in SQL)
or optional (i.e. only effecting the entity's rank in the list of candidates);
diff --git a/draft/schemas/reconciliation-query-batch.json b/draft/schemas/reconciliation-query-batch.json
index 03a4af6..7bfe4ae 100644
--- a/draft/schemas/reconciliation-query-batch.json
+++ b/draft/schemas/reconciliation-query-batch.json
@@ -55,19 +55,27 @@
"type": "string",
"description": "The text-processing language for the query"
},
- "properties": {
+ "conditions": {
"type": "array",
"minItems": 1,
- "description": "A list of property mappings to select candidates",
+ "description": "A list of conditions to select candidates",
"items": {
"type": "object",
"properties": {
+ "match_type": {
+ "type": "string",
+ "description": "A string to indicate whether to match the supplied value to entity names or property values",
+ "enum": [
+ "name",
+ "property"
+ ]
+ },
"pid": {
"type": "string",
- "description": "The identifier of the property, whose values will be compared to the values supplied. If absent, values will be matched against the entity names"
+ "description": "The identifier of the property, whose values will be compared to the values supplied. Required if 'match_type' is 'property'."
},
"v": {
- "description": "A value (or array of values) to match against the property values associated with the property on each candidate",
+ "description": "A value (or array of values) to match against the entity names or property values associated with the property on each candidate",
"oneOf": [
{
"$ref": "#/definitions/property_value"
@@ -82,7 +90,7 @@
},
"required": {
"type": "boolean",
- "description": "A boolean indicating if a match for the property is required for an entity to enter the list of candidates"
+ "description": "A boolean indicating if a match of this condition is required for an entity to enter the list of candidates"
},
"match_quantifier": {
"type": "string",
@@ -99,6 +107,7 @@
}
},
"required": [
+ "match_type",
"v"
]
}
@@ -114,7 +123,7 @@
}
},
"required": [
- "properties"
+ "conditions"
],
"additionalProperties": false
}
From a2b49095e87f1b4a3cf4ef5ce894adba02a7de0c Mon Sep 17 00:00:00 2001
From: Antonin Delpeuch
Date: Thu, 14 Mar 2024 13:47:02 +0100
Subject: [PATCH 3/3] Update draft/index.html
Co-authored-by: Fabian Steeg
---
draft/index.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/draft/index.html b/draft/index.html
index 021f10b..a9923b4 100644
--- a/draft/index.html
+++ b/draft/index.html
@@ -500,7 +500,7 @@ Structure of a Reconciliation Query
by allowing clients to specify an attribute of entities that should match. It consists of:
match_type
- - Either
name
or property
, depending on whether the condition related to entity names or their properties.
+ - Either
name
or property
, depending on whether the condition relates to entity names or their properties.
pid
- A property identifier, to be provided if and only if the
match_type
is property
.
v