Skip to content

Commit

Permalink
CHAM-586 Allow only a limited set of special characters in YAML context
Browse files Browse the repository at this point in the history
  • Loading branch information
karampitsos committed Sep 12, 2023
1 parent 01753fe commit 6b0bdd4
Show file tree
Hide file tree
Showing 11 changed files with 29 additions and 21 deletions.
10 changes: 9 additions & 1 deletion openformats/formats/yaml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,22 @@ def _is_custom_tag(self, tag):
Detect custom tags, like:
`foo: !bar test`
`foo: !xml "<bar>Bar</bar>"`
The name of the custom tag can have any of the following characters:
`a-z`, `A-Z`, `0-9`, `_`, `.`, `:`, `-`.
In any other case, we return `False`.
Built-in types, indicated by a `!!` prefix, will not be matched. We
can't preserve the information whether a built-in tag like `!!str` was
used for a value since the PyYAML library will tag such entries with
the built-in identifier. For example `tag:yaml.org,2002:str`, not
`!!str`.
"""
return tag.startswith('!') and not tag.startswith('!!')

return re.match(ensure_unicode(r'^\![a-zA-Z0-9_:.\-]*$'),
tag,
re.IGNORECASE)

def construct_mapping(self, node, deep=True):
"""
Override `yaml.SafeLoader.construct_mapping` to return for each item
Expand Down
4 changes: 2 additions & 2 deletions openformats/tests/formats/yaml/files/1_el.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ number: !!int 123 # Should ignore
bin: !!binary aGVsbG8= # Should ignore

# Custom tags with numbers and symbols
context_string: !he:fd94;fd/la "el:context string"
verbim_context_string: !contex!t545qa "el:verbim context string"
context_string: !cs:fd-94_fd.dot. "el:context string"
verbim_context_string: !context:t5-46_qa "el:verbim context string"
context_on_nested_map:
first: !first_context:54KJFLA95KJ4 "el:context in nested map"
second: !second_context:FDKJ40DK "el:context in nested map"
Expand Down
4 changes: 2 additions & 2 deletions openformats/tests/formats/yaml/files/1_en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ number: !!int 123 # Should ignore
bin: !!binary aGVsbG8= # Should ignore

# Custom tags with numbers and symbols
context_string: !he:fd94;fd/la "context string"
verbim_context_string: !<!contex!t545qa> "verbim context string"
context_string: !cs:fd-94_fd.dot. "context string"
verbim_context_string: !<!context:t5-46_qa> "verbim context string"
context_on_nested_map:
first: !first_context:54KJFLA95KJ4 "context in nested map"
second: !second_context:FDKJ40DK "context in nested map"
Expand Down
4 changes: 2 additions & 2 deletions openformats/tests/formats/yaml/files/1_en_exported.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ number: !!int 123 # Should ignore
bin: !!binary aGVsbG8= # Should ignore

# Custom tags with numbers and symbols
context_string: !he:fd94;fd/la "context string"
verbim_context_string: !contex!t545qa "verbim context string"
context_string: !cs:fd-94_fd.dot. "context string"
verbim_context_string: !context:t5-46_qa "verbim context string"
context_on_nested_map:
first: !first_context:54KJFLA95KJ4 "context in nested map"
second: !second_context:FDKJ40DK "context in nested map"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ alias_key:
foo: !test 'bar'
bar: !xml "foo <xml>bar</xml>"
hello: World
context_string: !he:fd94;fd/la "context string"
verbim_context_string: !contex%21t545qa "verbim context string"
context_string: !cs:fd-94_fd.dot. "context string"
verbim_context_string: !context:t5-46_qa "verbim context string"
context_on_nested_map:
first: "context in nested map"
second: "context in nested map"
Expand Down
4 changes: 2 additions & 2 deletions openformats/tests/formats/yaml/files/1_tpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ number: !!int 123 # Should ignore
bin: !!binary aGVsbG8= # Should ignore

# Custom tags with numbers and symbols
context_string: 629aded197db84c4d323cd3ed4cf0485_tr
verbim_context_string: d37275706209cb19ee621ff3835522fe_tr
context_string: 17d854ee46fe3d3bc91fadb4cf4df426_tr
verbim_context_string: 0a3c1ae205b2c0d09ab69ab540e481f2_tr
context_on_nested_map:
first: 95175e30d6fbfe0f658e75919cd4982c_tr
second: dce391c231a7ad7fef9e6cc0ddcbf549_tr
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ en:
other: el:other

# context
context_string: !he:fd94;fd/la "el:context string"
verbim_context_string: !contex!t545qa "el:verbim context string"
context_string: !cs:fd-94_fd.dot. "el:context string"
verbim_context_string: !context:t5-46_qa "el:verbim context string"
context_on_nested_map:
first: !first_context:54KJFLA95KJ4 "el:context in nested map"
second: !second_context:FDKJ40DK "el:context in nested map"
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ en:
other: other

# context
context_string: !he:fd94;fd/la "context string"
verbim_context_string: !<!contex!t545qa> "verbim context string"
context_string: !cs:fd-94_fd.dot. "context string"
verbim_context_string: !<!context:t5-46_qa> "verbim context string"
context_on_nested_map:
first: !first_context:54KJFLA95KJ4 "context in nested map"
second: !second_context:FDKJ40DK "context in nested map"
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ en:
other: other

# context
context_string: !he:fd94;fd/la "context string"
verbim_context_string: !contex!t545qa "verbim context string"
context_string: !cs:fd-94_fd.dot. "context string"
verbim_context_string: !context:t5-46_qa "verbim context string"
context_on_nested_map:
first: !first_context:54KJFLA95KJ4 "context in nested map"
second: !second_context:FDKJ40DK "context in nested map"
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ en:
anchor_mapping:
one: one
other: other
context_string: !he:fd94;fd/la "context string"
verbim_context_string: !contex%21t545qa "verbim context string"
context_string: !cs:fd-94_fd.dot. "context string"
verbim_context_string: !context:t5-46_qa "verbim context string"
context_on_nested_map:
first: "context in nested map"
second: "context in nested map"
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ en:
798cf4a4e275a90e80b0aac837f06793_pl

# context
context_string: 629aded197db84c4d323cd3ed4cf0485_tr
verbim_context_string: d37275706209cb19ee621ff3835522fe_tr
context_string: 17d854ee46fe3d3bc91fadb4cf4df426_tr
verbim_context_string: 0a3c1ae205b2c0d09ab69ab540e481f2_tr
context_on_nested_map:
first: 95175e30d6fbfe0f658e75919cd4982c_tr
second: dce391c231a7ad7fef9e6cc0ddcbf549_tr

0 comments on commit 6b0bdd4

Please sign in to comment.