Skip to content

Commit

Permalink
Fix Android XML v3 escaping
Browse files Browse the repository at this point in the history
  • Loading branch information
arisktfx committed Feb 5, 2024
1 parent 55ae51d commit 38ac98f
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 11 deletions.
127 changes: 125 additions & 2 deletions openformats/formats/android_unescaped.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,142 @@
import re
from hashlib import md5
from openformats.formats.android import AndroidHandler
from ..utils.xml import NewDumbXml as DumbXml
from ..utils.xmlutils import XMLUtils
from ..strings import OpenString


class AndroidUnescapedHandler(AndroidHandler):
def _create_string(self, name, text, comment, product, child, pluralized=False):
"""Creates a string and returns it. If empty string it returns None.
:param text: The strings text.
:param name: The name of the string.
:param comment: The developer's comment the string might have.
:param product: Extra context for the string.
:param child: The child tag that the string is created from. Used to find
line numbers when errors occur.
:returns: Returns an OpenString object if the text is not empty else None.
"""
if XMLUtils.validate_not_empty_string(
self.transcriber,
text,
child,
error_context={"main_tag": "plural", "child_tag": "item"},
):
text = self._escape_quotes(text)
if (name, product) in self.existing_hashes:
if child.tag in self.existing_hashes[(name, product)]:
format_dict = {"name": name, "child_tag": child.tag}
if product:
msg = (
"Duplicate `tag_name` ({child_tag}) for `name`"
" ({name}) and `product` ({product}) "
"found on line {line_number}"
)
format_dict["product"] = product
else:
msg = (
"Duplicate `tag_name` ({child_tag}) for `name`"
" ({name}) specify a product to differentiate"
)
XMLUtils.raise_error(
self.transcriber, child, msg, context=format_dict
)
else:
product += child.tag
# Create OpenString
string = OpenString(
name,
text,
context=product,
order=next(self.order_counter),
developer_comment=comment,
pluralized=pluralized,
)
self.existing_hashes.setdefault((name, product), [])
self.existing_hashes[(name, product)].append(child.tag)
return string
return None

def _escape_quotes(self, text):
"""Allow single and double quotes to be uploaded unescaped but they must be
returned escaped
"""
if type(text) == dict:
text = AndroidUnescapedHandler._escape_quotes_plural_string(text)
else:
text = AndroidUnescapedHandler._escape_quotes_simple_string(text)

return text

@staticmethod
def _escape_quotes_simple_string(text):
text, protected_tags = AndroidUnescapedHandler._protect_inline_tags(text)

text = re.sub(
r"(?<!\\)'",
"".join([DumbXml.BACKSLASH, DumbXml.SINGLE_QUOTE]),
text,
)
text = re.sub(
r'(?<!\\)"',
"".join([DumbXml.BACKSLASH, DumbXml.DOUBLE_QUOTES]),
text,
)

text = AndroidUnescapedHandler._unprotect_inline_tags(text, protected_tags)

return text

@staticmethod
def _escape_quotes_plural_string(text):
escaped_dict = {}
for key, string in text.items():
escaped_string = AndroidUnescapedHandler._escape_quotes_simple_string(
string
)
escaped_dict[key] = escaped_string

return escaped_dict

@staticmethod
def _protect_inline_tags(text):
"""Protect INLINE_TAGS from escaping single and double quotes"""
protected_tags = {}
wrapped_text = f"<x>{text}</x>"
parsed = DumbXml(wrapped_text)
children_iterator = parsed.find_children()

for child in children_iterator:
if child.tag in AndroidHandler.INLINE_TAGS:
child_content = child.source[child.start : child.end]
string_hash = md5(child_content.encode("utf-8")).hexdigest()
text = text.replace(child_content, string_hash)
protected_tags[string_hash] = child_content

return text, protected_tags

@staticmethod
def _unprotect_inline_tags(text, protected_tags):
for string_hash, string in protected_tags.items():
text = text.replace(string_hash, string)

return text

@staticmethod
def escape(string):
string, protected_tags = AndroidUnescapedHandler._protect_inline_tags(string)
string = AndroidHandler.escape(string)
return (
string = (
string.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace("\n", "\\n")
.replace("\t", "\\t")
.replace("@", "\\@")
.replace("?", "\\?")
)
return AndroidUnescapedHandler._unprotect_inline_tags(string, protected_tags)

@staticmethod
def unescape(string):
Expand Down
37 changes: 28 additions & 9 deletions openformats/tests/formats/android/test_android_unescaped.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest
from openformats.formats.android_unescaped import AndroidUnescapedHandler
from openformats.tests.formats.android.test_android import AndroidTestCase
from openformats.tests.formats.common import CommonFormatTestMixin
from openformats.tests.utils.strings import (
generate_random_string,
Expand All @@ -10,6 +11,15 @@
from openformats.strings import OpenString


class AndroidUnescapedFromAndroidTestCase(AndroidTestCase):
HANDLER_CLASS = AndroidUnescapedHandler
TESTFILE_BASE = "openformats/tests/formats/android/files"

def setUp(self):
super(AndroidUnescapedFromAndroidTestCase, self).setUp()
self.handler = AndroidUnescapedHandler()


class AndroidUnescapedTestCase(CommonFormatTestMixin, unittest.TestCase):
HANDLER_CLASS = AndroidUnescapedHandler
TESTFILE_BASE = "openformats/tests/formats/android/files"
Expand All @@ -19,31 +29,40 @@ def setUp(self):
self.handler = AndroidUnescapedHandler()

def test_string(self):
self.maxDiff = None
random_key = generate_random_string()
random_string = generate_random_string()
random_openstring = OpenString(random_key, random_string, order=0)
random_hash = random_openstring.template_replacement
uploaded_string = (
'&amp; &lt; &gt; \' \n \t \@ \? " <xliff:g id="1">%1$s</xliff:g>'
)
stored_string = (
'&amp; &lt; &gt; \\\' \n \t \@ \? \\" <xliff:g id="1">%1$s</xliff:g>'
)
stored_openstring = OpenString(random_key, stored_string, order=0)
random_hash = stored_openstring.template_replacement

source_python_template = """
<resources>
<string name="{key}">{string}</string>
</resources>
"""
source = source_python_template.format(key=random_key, string=random_string)
source = source_python_template.format(key=random_key, string=uploaded_string)
stored_source = source_python_template.format(
key=random_key, string=stored_string
)

template, stringset = self.handler.parse(source)
compiled = self.handler.compile(template, [random_openstring])
compiled = self.handler.compile(template, [stored_openstring])

self.assertEqual(
template, source_python_template.format(key=random_key, string=random_hash)
)
self.assertEqual(len(stringset), 1)
self.assertEqual(stringset[0].__dict__, random_openstring.__dict__)
self.assertEqual(compiled, source)
self.assertEqual(stringset[0].__dict__, stored_openstring.__dict__)
self.assertEqual(compiled, stored_source)

def test_escape(self):
rich = "&<>'\n\t@?" + '"'
raw = "&amp;&lt;&gt;\\'\\n\\t\\@\\?" + '\\"'
rich = '&>"\n\t@? <xliff:g id="1">%1$s &</xliff:g>'
raw = '&amp;&gt;\\"\\n\\t\\@\\? <xliff:g id="1">%1$s &</xliff:g>'

self.assertEqual(
AndroidUnescapedHandler.escape(rich),
Expand Down

0 comments on commit 38ac98f

Please sign in to comment.