Skip to content

Commit

Permalink
Merge pull request #338 from dsavinov-actionengine/support_arb
Browse files Browse the repository at this point in the history
Adding support for ARB (Application Resource Bundle) (.arb) format
  • Loading branch information
kbairak authored Jun 25, 2024
2 parents 80af7c5 + 7941dd7 commit 385d505
Show file tree
Hide file tree
Showing 8 changed files with 727 additions and 16 deletions.
1 change: 1 addition & 0 deletions bin/create_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def get_handler(ext):
'vtt': vtt.VttHandler(),
'xml': android.AndroidHandler(),
'json': json.JsonHandler(),
'arb': json.ArbHandler(),
'po': po.PoHandler(),
'md': github_markdown_v2.GithubMarkdownHandlerV2(),
}[ext]
Expand Down
187 changes: 171 additions & 16 deletions openformats/formats/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _extract(self, parsed, nest=None):
for key, key_position, value, value_position in parsed:
key = self._escape_key(key)
if nest is not None:
key = u"{}.{}".format(nest, key)
key = f"{nest}.{key}"

# 'key' should be unique
if key in self.existing_keys:
Expand Down Expand Up @@ -130,9 +130,9 @@ def _extract(self, parsed, nest=None):
elif parsed.type == list:
for index, (item, item_position) in enumerate(parsed):
if nest is None:
key = u"..{}..".format(index)
key = f"..{index}.."
else:
key = u"{}..{}..".format(nest, index)
key = f"{nest}..{index}.."
if isinstance(item, (six.binary_type, six.text_type)):
if not item.strip():
continue
Expand Down Expand Up @@ -162,7 +162,7 @@ def _create_openstring(self, key, value, value_position):
# e.g. a pluralized string.
# If it cannot be parsed that way (returns None), parse it like
# a regular string.
parser = ICUParser(allow_numeric_plural_values=False)
parser = ICUParser(allow_numeric_plural_values = False)
icu_string = parser.parse(key, value)
if icu_string:
return self._create_pluralized_string(icu_string, value_position)
Expand All @@ -171,7 +171,8 @@ def _create_openstring(self, key, value, value_position):
key, value, value_position
)

def _create_pluralized_string(self, icu_string, value_position):
def _create_pluralized_string(self, icu_string, value_position,
context_value="", description_value=""):
"""Create a pluralized string based on the given information.
Also updates the transcriber accordingly.
Expand All @@ -185,6 +186,8 @@ def _create_pluralized_string(self, icu_string, value_position):
icu_string.strings_by_rule,
pluralized=icu_string.pluralized,
order=next(self._order),
context=context_value,
developer_comment=description_value,
)

current_pos = icu_string.current_position
Expand All @@ -196,15 +199,19 @@ def _create_pluralized_string(self, icu_string, value_position):

return openstring

def _create_regular_string(self, key, value, value_position):
def _create_regular_string(self, key, value, value_position,
context_value="", description_value=""):
"""Return a new simple OpenString based on the given key and value
and update the transcriber accordingly.
:param key: the string key
:param value: the translation string
:return: an OpenString or None
"""
openstring = OpenString(key, value, order=next(self._order))
openstring = OpenString(key, value, order=next(self._order),
context=context_value,
developer_comment=description_value,
)
self.transcriber.copy_until(value_position)
self.transcriber.add(openstring.template_replacement)
self.transcriber.skip(len(value))
Expand Down Expand Up @@ -472,6 +479,156 @@ def unescape(string):
return unescape(string)


class ArbHandler(JsonHandler):
name = "ARB"
extension = "arb"
keep_sections = True

def parse(self, content, **kwargs):
# Validate that content is JSON
self.validate_content(content)

self.transcriber = Transcriber(content)
source = self.transcriber.source
self.stringset = []
self.existing_keys = set()
self.metadata = dict()

try:
parsed = DumbJson(source)
except ValueError as e:
raise ParseError(six.text_type(e))
if parsed.type != dict:
raise ParseError("Invalid JSON")
self._order = count()
self._find_keys(parsed)
self._extract(parsed)

if not self.stringset:
raise ParseError('No strings could be extracted')

self.transcriber.copy_until(len(source))

return self.transcriber.get_destination(), self.stringset

def _find_keys(self, parsed, nest=None):
for key, key_position, value, _ in parsed:
key = self._escape_key(key)
if nest is not None:
key = f"{nest}.{key}"

# 'key' should be unique
if key in self.existing_keys:
# Need this for line number
self.transcriber.copy_until(key_position)
raise ParseError(u"Duplicate string key ('{}') in line {}".
format(key, self.transcriber.line_number))
if nest is None: # store all root-level keys in order to detect duplication
self.existing_keys.add(key)
elif key.startswith("@"):
if key.endswith(".type") and value != "text":
self.existing_keys.add(key)
elif key.endswith(".context"):
self.metadata[key] = value
elif key.endswith(".description"):
self.metadata[key] = value

if isinstance(value, DumbJson):
self._find_keys(value, key)
else:
pass

def _extract(self, parsed):
for key, _, value, value_position in parsed:
key = self._escape_key(key)

if key.startswith("@"):
continue
elif isinstance(value, (six.text_type)):
if not value.strip():
continue
elif f"@{key}.type" in self.existing_keys:
continue

context_key = f"@{key}.context"
context_value = self.metadata[context_key] \
if context_key in self.metadata.keys() else ""
description_key = f"@{key}.description"
description_value = self.metadata[description_key] \
if description_key in self.metadata.keys() else ""

openstring = self._create_openstring(key, value,
value_position,
context_value,
description_value)
if openstring:
self.stringset.append(openstring)
else:
# Ignore other JSON types (bools, nulls, numbers)
pass

def _create_openstring(self, key, value, value_position,
context_value, description_value):
parser = ICUParser(allow_numeric_plural_values = True)
icu_string = parser.parse(key, value)
if icu_string:
return self._create_pluralized_string(icu_string, value_position,
context_value,
description_value)

return self._create_regular_string(
key, value, value_position,
context_value, description_value
)

def compile(self, template, stringset, language_info=None, **kwargs):
# Lets play on the template first, we need it to not include the hashes
# that aren't in the stringset. For that we will create a new stringset
# which will have the hashes themselves as strings and compile against
# that. The compilation process will remove any string sections that
# are absent from the stringset. Next we will call `_clean_empties`
# from the template to clear out any `..., ,...` or `...{ ,...`
# sequences left. The result will be used as the actual template for
# the compilation process
self.keep_sections = kwargs.get('keep_sections', True)

stringset = list(stringset)

fake_stringset = [
OpenString(openstring.key,
openstring.template_replacement,
order=openstring.order,
pluralized=openstring.pluralized)
for openstring in stringset
]
new_template = self._replace_translations(
template, fake_stringset, False
)
new_template = self._clean_empties(new_template)

if language_info is not None:
match = re.search(r'(\"@@locale\"\s*:\s*\")([A-Z_a-z]*)\"', new_template)
if match:
new_template = u"{}{}{}".format(
new_template[:match.start(2)],
language_info["code"],
new_template[match.end(2):]
)

return self._replace_translations(new_template, stringset, True)

def _copy_until_and_remove_section(self, pos):
"""
Copy characters to the transcriber until the given position,
then end the current section.
"""
self.transcriber.copy_until(pos)
self.transcriber.mark_section_end()
# Unlike the JSON format, do not remove the remaining section of the template
if self.keep_sections == False: # needed for a test
self.transcriber.remove_section()


class StructuredJsonHandler(JsonHandler):
"""Handler that preserves certain keys for internal usage, while
keeping the flexibility and functionality of the original JsonHandler. It
Expand Down Expand Up @@ -534,16 +691,15 @@ def _compile_value(self, value, template_value, value_position, skip=False):
self.transcriber.add(u"null")
else:
if template_value is None:
self.transcriber.add(u"\"{}\"".format(value))
self.transcriber.add(f"\"{value}\"")
else:
self.transcriber.add(u"{}".format(value))
self.transcriber.add(f"{value}")
else:
self.transcriber.add(u"null")

self.transcriber.skip(len(u"{}".format(template_value)))
self.transcriber.skip(len(f"{template_value}"))
self.transcriber.copy_until(value_position +
len(u"{}".format(template_value)) +
1)
len(f"{template_value}") + 1)

def _compile_recursively(self, current_part):
if isinstance(current_part, DumbJson):
Expand Down Expand Up @@ -623,7 +779,7 @@ def _compile_recursively(self, current_part):
value_position)
elif not isinstance(value, DumbJson):
self.transcriber.copy_until(
value_position + len(u"{}".format(value)) + 1
value_position + len(f"{value}") + 1
)

extra_elements = []
Expand Down Expand Up @@ -765,7 +921,7 @@ def _parse_key(self, key):
"""
# We need to parse only STRING_KEY keys, otherwise we should
# early return
if not key.endswith(".{}".format(self.STRING_KEY)):
if not key.endswith(f".{self.STRING_KEY}"):
return None
# Remove the STRING_KEY part of the key as it is not needed. Add +1
# when calculating the length of the STRING_KEY, for the "." character
Expand Down Expand Up @@ -1004,8 +1160,7 @@ def parse(self, content, **kwargs):
outer_key_position, outer_value, outer_value_position) in parsed:
if outer_key in existing_keys:
transcriber.copy_until(outer_key_position)
raise ParseError(u"Key '{}' appears multiple times (line {})".
format(outer_key, transcriber.line_number))
raise ParseError(f"Key '{outer_key}' appears multiple times (line {transcriber.line_number})")
existing_keys.add(outer_key)

if not isinstance(outer_value, DumbJson):
Expand Down
Empty file.
43 changes: 43 additions & 0 deletions openformats/tests/formats/arb/files/1_el.arb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"@@locale": "en_US",
"@@x-template": "path/to/template.arb",
"@@context": "HomePage",

"MSG_OK": "el:Everything works fine.",

"title_bar": "el:My Cool Home",
"@title_bar": {
"type": "text",
"context": "HomePage",
"description": "Page title."
},

"total_files": "{ item_count, plural, one {el:You have {file_count} file.} other {el:You have {file_count} files.} }",
"special_chars": "{ cnt, plural, one {el:This is Sam's book.} other {el:These are Sam's books.} }",
"gold_coins": "{count, plural, zero {el:The chest is empty.} one {el:You have one gold coin.} other {el:You have {cnt} gold coins.}}",
"custom_plural_value": "{number, plural, one {el:1 New} two {el:# New}}",

"logo@src": "images/001.jpg",
"@logo@src": {
"context": "arb_editor",
"type": "image",
"description": "logo image, 128x128"
},

"font_style": "#title {font-family: Verdana, Geneva, sans-serif; font-style: oblique; font-size: 36px}",
"@font_style": {
"context": "arb_editor",
"type": "css",
"description": "font specific css"
},

"input_test1@placeholder": "el:localized placeholder text",
"input_test2@value": "el:localized input value",

"logo": "el:ARB",
"@logo": {
"type": "text",
"screen": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBhQGBRUIBwgKFQkKDRYODRYMFhYfHhoWHRweHB8cHh4cJzIqIyUkHB4cITssLycpLCwsFSExPjAtNSgrLEABCQoKDQsNGQ4OGTUkHiQ1LDU1NS4sLCo1NTYpNTYpLCw1NS40NCw0LikuKSkpLCwpLCw0KTQpKSwsNCkpNCkpLP/AABEIADIAMgMBIgACEQEDEQH/xAAbAAACAwEBAQAAAAAAAAAAAAAABQIEBgcBA//EADQQAAEDAgIIAwUJAAAAAAAAAAEAAgMEEQUhBhITMUFRYXEUIjIjgbHB0QcWNEJTkaGi4f/EABgBAAMBAQAAAAAAAAAAAAAAAAABBAUC/8QAIBEAAgEEAgMBAAAAAAAAAAAAAAECAxExQQQhEiKhFP/aAAwDAQACEQMRAD8A7c30+4LN6WSubVwxxyPbYufdhIzyA3dyvtJjTqHFjHKWvpZbOjLbXFsiLjI2cDlvzVPSmobII6mJwLQ19rcxY2PLsmVUINVFddFj7xeGwNz5zeoiadU88sj9f9SfB8VqKOQvmqHSMkiHllPpflmCM7b7hIJ6uSplAkn9kDrFthY24Gwum9LXtqachoIeBmD8QeKWDS/PCEX0nf4OsG0hkmxjYVjmmOa4ZqgDVIBPcg9ei1Nlh8FDY8VbNM6zIgXn9rD+Sr2JaXPjkEVBTN13uDI9oTck7hYfVMhrUPKfotGrQq8DX+HbtpGbXVGvqjK9s7e9epEVjn+k8UmB4trAu8FUv2sd9weR5gOR49R2VeWsGI0oEbgJb2IPG+S6NV0TK+jNPVRNdE8WcHLn+O6DzUFUZMHa99M4ekWLmnkQSLjrv+KT6NCjyY+KUumtmbnqNhUGF5Ie02IKvUkxEJlYPLexKli2D1FDRNrcSpYtj6Ha4zHImxNhwGe9JjWSOpCwgiFxvEQ02yyIHPuk6mmiyNeMkOZMW1W2a4342TvQzAXYhiLcYqmkQwkmC/5iQRcdBc58Sk+iNBTmU1OkNXFZpAhidffzNt/AAZ9eS6w1oa2zQAAMrLq60S8nkNJwis7JoQhBmEW+kdgpWUW+kdgpIAyv2iR+I0dbSkfiayCP+4PyT+sw2PEaXYVdOx0Rys4fDkkOlz9pjNDSfq120PZgv81qBuSWzlN3Yjw/Q6mwup29NS+1YbtL3OdY8wHGwPVPEL1M7cnLLBCEIERb6R2CkhCAKlTA2Sdkj42F8TzsyQLt8h3HgrQQhAlk9QhCBghCEAf/2Q==",
"video": "http://www.youtube.com/user_interaction"
}
}
43 changes: 43 additions & 0 deletions openformats/tests/formats/arb/files/1_en.arb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"@@locale": "en_US",
"@@x-template": "path/to/template.arb",
"@@context": "HomePage",

"MSG_OK": "Everything works fine.",

"title_bar": "My Cool Home",
"@title_bar": {
"type": "text",
"context": "HomePage",
"description": "Page title."
},

"total_files": "{ item_count, plural, one {You have {file_count} file.} other {You have {file_count} files.} }",
"special_chars": "{ cnt, plural, one {This is Sam's book.} other {These are Sam's books.} }",
"gold_coins": "{count, plural, zero {The chest is empty.} one {You have one gold coin.} other {You have {cnt} gold coins.}}",
"custom_plural_value": "{number, plural, =1 {1 New} =2 {# New}}",

"logo@src": "images/001.jpg",
"@logo@src": {
"context": "arb_editor",
"type": "image",
"description": "logo image, 128x128"
},

"font_style": "#title {font-family: Verdana, Geneva, sans-serif; font-style: oblique; font-size: 36px}",
"@font_style": {
"context": "arb_editor",
"type": "css",
"description": "font specific css"
},

"input_test1@placeholder": "localized placeholder text",
"input_test2@value": "localized input value",

"logo": "ARB",
"@logo": {
"type": "text",
"screen": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBhQGBRUIBwgKFQkKDRYODRYMFhYfHhoWHRweHB8cHh4cJzIqIyUkHB4cITssLycpLCwsFSExPjAtNSgrLEABCQoKDQsNGQ4OGTUkHiQ1LDU1NS4sLCo1NTYpNTYpLCw1NS40NCw0LikuKSkpLCwpLCw0KTQpKSwsNCkpNCkpLP/AABEIADIAMgMBIgACEQEDEQH/xAAbAAACAwEBAQAAAAAAAAAAAAAABQIEBgcBA//EADQQAAEDAgIIAwUJAAAAAAAAAAEAAgMEEQUhBhITMUFRYXEUIjIjgbHB0QcWNEJTkaGi4f/EABgBAAMBAQAAAAAAAAAAAAAAAAABBAUC/8QAIBEAAgEEAgMBAAAAAAAAAAAAAAECAxExQQQhEiKhFP/aAAwDAQACEQMRAD8A7c30+4LN6WSubVwxxyPbYufdhIzyA3dyvtJjTqHFjHKWvpZbOjLbXFsiLjI2cDlvzVPSmobII6mJwLQ19rcxY2PLsmVUINVFddFj7xeGwNz5zeoiadU88sj9f9SfB8VqKOQvmqHSMkiHllPpflmCM7b7hIJ6uSplAkn9kDrFthY24Gwum9LXtqachoIeBmD8QeKWDS/PCEX0nf4OsG0hkmxjYVjmmOa4ZqgDVIBPcg9ei1Nlh8FDY8VbNM6zIgXn9rD+Sr2JaXPjkEVBTN13uDI9oTck7hYfVMhrUPKfotGrQq8DX+HbtpGbXVGvqjK9s7e9epEVjn+k8UmB4trAu8FUv2sd9weR5gOR49R2VeWsGI0oEbgJb2IPG+S6NV0TK+jNPVRNdE8WcHLn+O6DzUFUZMHa99M4ekWLmnkQSLjrv+KT6NCjyY+KUumtmbnqNhUGF5Ie02IKvUkxEJlYPLexKli2D1FDRNrcSpYtj6Ha4zHImxNhwGe9JjWSOpCwgiFxvEQ02yyIHPuk6mmiyNeMkOZMW1W2a4342TvQzAXYhiLcYqmkQwkmC/5iQRcdBc58Sk+iNBTmU1OkNXFZpAhidffzNt/AAZ9eS6w1oa2zQAAMrLq60S8nkNJwis7JoQhBmEW+kdgpWUW+kdgpIAyv2iR+I0dbSkfiayCP+4PyT+sw2PEaXYVdOx0Rys4fDkkOlz9pjNDSfq120PZgv81qBuSWzlN3Yjw/Q6mwup29NS+1YbtL3OdY8wHGwPVPEL1M7cnLLBCEIERb6R2CkhCAKlTA2Sdkj42F8TzsyQLt8h3HgrQQhAlk9QhCBghCEAf/2Q==",
"video": "http://www.youtube.com/user_interaction"
}
}
Loading

0 comments on commit 385d505

Please sign in to comment.