From 6b0bdd433b1a6a29682b7e21c3d4addc79a4c1cd Mon Sep 17 00:00:00 2001 From: Nikos Date: Fri, 8 Sep 2023 12:41:45 +0300 Subject: [PATCH] CHAM-586 Allow only a limited set of special characters in YAML context --- openformats/formats/yaml/utils.py | 10 +++++++++- openformats/tests/formats/yaml/files/1_el.yml | 4 ++-- openformats/tests/formats/yaml/files/1_en.yml | 4 ++-- openformats/tests/formats/yaml/files/1_en_exported.yml | 4 ++-- .../yaml/files/1_en_exported_without_template.yml | 4 ++-- openformats/tests/formats/yaml/files/1_tpl.yml | 4 ++-- .../formats/yamlinternationalization/files/1_el.yml | 4 ++-- .../formats/yamlinternationalization/files/1_en.yml | 4 ++-- .../yamlinternationalization/files/1_en_exported.yml | 4 ++-- .../files/1_en_exported_without_template.yml | 4 ++-- .../formats/yamlinternationalization/files/1_tpl.yml | 4 ++-- 11 files changed, 29 insertions(+), 21 deletions(-) diff --git a/openformats/formats/yaml/utils.py b/openformats/formats/yaml/utils.py index ebacae0c..9d9852e0 100644 --- a/openformats/formats/yaml/utils.py +++ b/openformats/formats/yaml/utils.py @@ -142,14 +142,22 @@ def _is_custom_tag(self, tag): Detect custom tags, like: `foo: !bar test` `foo: !xml "Bar"` + + The name of the custom tag can have any of the following characters: + `a-z`, `A-Z`, `0-9`, `_`, `.`, `:`, `-`. + In any other case, we return `False`. + Built-in types, indicated by a `!!` prefix, will not be matched. We can't preserve the information whether a built-in tag like `!!str` was used for a value since the PyYAML library will tag such entries with the built-in identifier. For example `tag:yaml.org,2002:str`, not `!!str`. """ - return tag.startswith('!') and not tag.startswith('!!') + return re.match(ensure_unicode(r'^\![a-zA-Z0-9_:.\-]*$'), + tag, + re.IGNORECASE) + def construct_mapping(self, node, deep=True): """ Override `yaml.SafeLoader.construct_mapping` to return for each item diff --git a/openformats/tests/formats/yaml/files/1_el.yml b/openformats/tests/formats/yaml/files/1_el.yml index bfb5dc88..45e24e9b 100644 --- a/openformats/tests/formats/yaml/files/1_el.yml +++ b/openformats/tests/formats/yaml/files/1_el.yml @@ -78,8 +78,8 @@ number: !!int 123 # Should ignore bin: !!binary aGVsbG8= # Should ignore # Custom tags with numbers and symbols -context_string: !he:fd94;fd/la "el:context string" -verbim_context_string: !contex!t545qa "el:verbim context string" +context_string: !cs:fd-94_fd.dot. "el:context string" +verbim_context_string: !context:t5-46_qa "el:verbim context string" context_on_nested_map: first: !first_context:54KJFLA95KJ4 "el:context in nested map" second: !second_context:FDKJ40DK "el:context in nested map" diff --git a/openformats/tests/formats/yaml/files/1_en.yml b/openformats/tests/formats/yaml/files/1_en.yml index 18431611..b751cca8 100644 --- a/openformats/tests/formats/yaml/files/1_en.yml +++ b/openformats/tests/formats/yaml/files/1_en.yml @@ -81,8 +81,8 @@ number: !!int 123 # Should ignore bin: !!binary aGVsbG8= # Should ignore # Custom tags with numbers and symbols -context_string: !he:fd94;fd/la "context string" -verbim_context_string: ! "verbim context string" +context_string: !cs:fd-94_fd.dot. "context string" +verbim_context_string: ! "verbim context string" context_on_nested_map: first: !first_context:54KJFLA95KJ4 "context in nested map" second: !second_context:FDKJ40DK "context in nested map" diff --git a/openformats/tests/formats/yaml/files/1_en_exported.yml b/openformats/tests/formats/yaml/files/1_en_exported.yml index 10fa97ab..4e324640 100644 --- a/openformats/tests/formats/yaml/files/1_en_exported.yml +++ b/openformats/tests/formats/yaml/files/1_en_exported.yml @@ -78,8 +78,8 @@ number: !!int 123 # Should ignore bin: !!binary aGVsbG8= # Should ignore # Custom tags with numbers and symbols -context_string: !he:fd94;fd/la "context string" -verbim_context_string: !contex!t545qa "verbim context string" +context_string: !cs:fd-94_fd.dot. "context string" +verbim_context_string: !context:t5-46_qa "verbim context string" context_on_nested_map: first: !first_context:54KJFLA95KJ4 "context in nested map" second: !second_context:FDKJ40DK "context in nested map" diff --git a/openformats/tests/formats/yaml/files/1_en_exported_without_template.yml b/openformats/tests/formats/yaml/files/1_en_exported_without_template.yml index b9df1d14..2a21c55f 100644 --- a/openformats/tests/formats/yaml/files/1_en_exported_without_template.yml +++ b/openformats/tests/formats/yaml/files/1_en_exported_without_template.yml @@ -39,8 +39,8 @@ alias_key: foo: !test 'bar' bar: !xml "foo bar" hello: World -context_string: !he:fd94;fd/la "context string" -verbim_context_string: !contex%21t545qa "verbim context string" +context_string: !cs:fd-94_fd.dot. "context string" +verbim_context_string: !context:t5-46_qa "verbim context string" context_on_nested_map: first: "context in nested map" second: "context in nested map" diff --git a/openformats/tests/formats/yaml/files/1_tpl.yml b/openformats/tests/formats/yaml/files/1_tpl.yml index 3c6e96b5..0a5efd5d 100644 --- a/openformats/tests/formats/yaml/files/1_tpl.yml +++ b/openformats/tests/formats/yaml/files/1_tpl.yml @@ -72,8 +72,8 @@ number: !!int 123 # Should ignore bin: !!binary aGVsbG8= # Should ignore # Custom tags with numbers and symbols -context_string: 629aded197db84c4d323cd3ed4cf0485_tr -verbim_context_string: d37275706209cb19ee621ff3835522fe_tr +context_string: 17d854ee46fe3d3bc91fadb4cf4df426_tr +verbim_context_string: 0a3c1ae205b2c0d09ab69ab540e481f2_tr context_on_nested_map: first: 95175e30d6fbfe0f658e75919cd4982c_tr second: dce391c231a7ad7fef9e6cc0ddcbf549_tr diff --git a/openformats/tests/formats/yamlinternationalization/files/1_el.yml b/openformats/tests/formats/yamlinternationalization/files/1_el.yml index 73116b28..7f45b6d8 100644 --- a/openformats/tests/formats/yamlinternationalization/files/1_el.yml +++ b/openformats/tests/formats/yamlinternationalization/files/1_el.yml @@ -102,8 +102,8 @@ en: other: el:other # context - context_string: !he:fd94;fd/la "el:context string" - verbim_context_string: !contex!t545qa "el:verbim context string" + context_string: !cs:fd-94_fd.dot. "el:context string" + verbim_context_string: !context:t5-46_qa "el:verbim context string" context_on_nested_map: first: !first_context:54KJFLA95KJ4 "el:context in nested map" second: !second_context:FDKJ40DK "el:context in nested map" diff --git a/openformats/tests/formats/yamlinternationalization/files/1_en.yml b/openformats/tests/formats/yamlinternationalization/files/1_en.yml index 361c8663..deb2a70e 100644 --- a/openformats/tests/formats/yamlinternationalization/files/1_en.yml +++ b/openformats/tests/formats/yamlinternationalization/files/1_en.yml @@ -106,8 +106,8 @@ en: other: other # context - context_string: !he:fd94;fd/la "context string" - verbim_context_string: ! "verbim context string" + context_string: !cs:fd-94_fd.dot. "context string" + verbim_context_string: ! "verbim context string" context_on_nested_map: first: !first_context:54KJFLA95KJ4 "context in nested map" second: !second_context:FDKJ40DK "context in nested map" diff --git a/openformats/tests/formats/yamlinternationalization/files/1_en_exported.yml b/openformats/tests/formats/yamlinternationalization/files/1_en_exported.yml index 97d22440..d603d85e 100644 --- a/openformats/tests/formats/yamlinternationalization/files/1_en_exported.yml +++ b/openformats/tests/formats/yamlinternationalization/files/1_en_exported.yml @@ -102,8 +102,8 @@ en: other: other # context - context_string: !he:fd94;fd/la "context string" - verbim_context_string: !contex!t545qa "verbim context string" + context_string: !cs:fd-94_fd.dot. "context string" + verbim_context_string: !context:t5-46_qa "verbim context string" context_on_nested_map: first: !first_context:54KJFLA95KJ4 "context in nested map" second: !second_context:FDKJ40DK "context in nested map" diff --git a/openformats/tests/formats/yamlinternationalization/files/1_en_exported_without_template.yml b/openformats/tests/formats/yamlinternationalization/files/1_en_exported_without_template.yml index 6726558e..1595af46 100644 --- a/openformats/tests/formats/yamlinternationalization/files/1_en_exported_without_template.yml +++ b/openformats/tests/formats/yamlinternationalization/files/1_en_exported_without_template.yml @@ -59,8 +59,8 @@ en: anchor_mapping: one: one other: other - context_string: !he:fd94;fd/la "context string" - verbim_context_string: !contex%21t545qa "verbim context string" + context_string: !cs:fd-94_fd.dot. "context string" + verbim_context_string: !context:t5-46_qa "verbim context string" context_on_nested_map: first: "context in nested map" second: "context in nested map" diff --git a/openformats/tests/formats/yamlinternationalization/files/1_tpl.yml b/openformats/tests/formats/yamlinternationalization/files/1_tpl.yml index b9484b76..85943df5 100644 --- a/openformats/tests/formats/yamlinternationalization/files/1_tpl.yml +++ b/openformats/tests/formats/yamlinternationalization/files/1_tpl.yml @@ -92,8 +92,8 @@ en: 798cf4a4e275a90e80b0aac837f06793_pl # context - context_string: 629aded197db84c4d323cd3ed4cf0485_tr - verbim_context_string: d37275706209cb19ee621ff3835522fe_tr + context_string: 17d854ee46fe3d3bc91fadb4cf4df426_tr + verbim_context_string: 0a3c1ae205b2c0d09ab69ab540e481f2_tr context_on_nested_map: first: 95175e30d6fbfe0f658e75919cd4982c_tr second: dce391c231a7ad7fef9e6cc0ddcbf549_tr