Merge pull request #338 from dsavinov-actionengine/support_arb

Adding support for ARB (Application Resource Bundle) (.arb) format
transifex · Jun 25, 2024 · 385d505 · 385d505
2 parents 80af7c5 + 7941dd7
commit 385d505
Show file tree

Hide file tree

Showing 8 changed files with 727 additions and 16 deletions.
diff --git a/bin/create_files.py b/bin/create_files.py
@@ -32,6 +32,7 @@ def get_handler(ext):
         'vtt': vtt.VttHandler(),
         'xml': android.AndroidHandler(),
         'json': json.JsonHandler(),
+        'arb': json.ArbHandler(),
         'po': po.PoHandler(),
         'md': github_markdown_v2.GithubMarkdownHandlerV2(),
     }[ext]

diff --git a/openformats/formats/json.py b/openformats/formats/json.py
@@ -74,7 +74,7 @@ def _extract(self, parsed, nest=None):
             for key, key_position, value, value_position in parsed:
                 key = self._escape_key(key)
                 if nest is not None:
-                    key = u"{}.{}".format(nest, key)
+                    key = f"{nest}.{key}"
 
                 # 'key' should be unique
                 if key in self.existing_keys:
@@ -130,9 +130,9 @@ def _extract(self, parsed, nest=None):
         elif parsed.type == list:
             for index, (item, item_position) in enumerate(parsed):
                 if nest is None:
-                    key = u"..{}..".format(index)
+                    key = f"..{index}.."
                 else:
-                    key = u"{}..{}..".format(nest, index)
+                    key = f"{nest}..{index}.."
                 if isinstance(item, (six.binary_type, six.text_type)):
                     if not item.strip():
                         continue
@@ -162,7 +162,7 @@ def _create_openstring(self, key, value, value_position):
         # e.g. a pluralized string.
         # If it cannot be parsed that way (returns None), parse it like
         # a regular string.
-        parser = ICUParser(allow_numeric_plural_values=False)
+        parser = ICUParser(allow_numeric_plural_values = False)
         icu_string = parser.parse(key, value)
         if icu_string:
             return self._create_pluralized_string(icu_string, value_position)
@@ -171,7 +171,8 @@ def _create_openstring(self, key, value, value_position):
             key, value, value_position
         )
 
-    def _create_pluralized_string(self, icu_string, value_position):
+    def _create_pluralized_string(self, icu_string, value_position,
+                                  context_value="", description_value=""):
         """Create a pluralized string based on the given information.
 
         Also updates the transcriber accordingly.
@@ -185,6 +186,8 @@ def _create_pluralized_string(self, icu_string, value_position):
             icu_string.strings_by_rule,
             pluralized=icu_string.pluralized,
             order=next(self._order),
+            context=context_value,
+            developer_comment=description_value,
         )
 
         current_pos = icu_string.current_position
@@ -196,15 +199,19 @@ def _create_pluralized_string(self, icu_string, value_position):
 
         return openstring
 
-    def _create_regular_string(self, key, value, value_position):
+    def _create_regular_string(self, key, value, value_position,
+                               context_value="", description_value=""):
         """Return a new simple OpenString based on the given key and value
         and update the transcriber accordingly.
 
         :param key: the string key
         :param value: the translation string
         :return: an OpenString or None
         """
-        openstring = OpenString(key, value, order=next(self._order))
+        openstring = OpenString(key, value, order=next(self._order),
+                                context=context_value,
+                                developer_comment=description_value,
+        )
         self.transcriber.copy_until(value_position)
         self.transcriber.add(openstring.template_replacement)
         self.transcriber.skip(len(value))
@@ -472,6 +479,156 @@ def unescape(string):
         return unescape(string)
 
 
+class ArbHandler(JsonHandler):
+    name = "ARB"
+    extension = "arb"
+    keep_sections = True
+
+    def parse(self, content, **kwargs):
+        # Validate that content is JSON
+        self.validate_content(content)
+
+        self.transcriber = Transcriber(content)
+        source = self.transcriber.source
+        self.stringset = []
+        self.existing_keys = set()
+        self.metadata = dict()
+
+        try:
+            parsed = DumbJson(source)
+        except ValueError as e:
+            raise ParseError(six.text_type(e))
+        if parsed.type != dict:
+            raise ParseError("Invalid JSON")
+        self._order = count()
+        self._find_keys(parsed)
+        self._extract(parsed)
+
+        if not self.stringset:
+            raise ParseError('No strings could be extracted')
+
+        self.transcriber.copy_until(len(source))
+
+        return self.transcriber.get_destination(), self.stringset
+
+    def _find_keys(self, parsed, nest=None):
+        for key, key_position, value, _ in parsed:
+            key = self._escape_key(key)
+            if nest is not None:
+                key = f"{nest}.{key}"
+
+            # 'key' should be unique
+            if key in self.existing_keys:
+                # Need this for line number
+                self.transcriber.copy_until(key_position)
+                raise ParseError(u"Duplicate string key ('{}') in line {}".
+                                    format(key, self.transcriber.line_number))
+            if nest is None:  # store all root-level keys in order to detect duplication
+                self.existing_keys.add(key)
+            elif key.startswith("@"):
+                if key.endswith(".type") and value != "text":
+                    self.existing_keys.add(key)
+                elif key.endswith(".context"):
+                    self.metadata[key] = value
+                elif key.endswith(".description"):
+                    self.metadata[key] = value
+
+            if isinstance(value, DumbJson):
+                self._find_keys(value, key)
+            else:
+                pass
+
+    def _extract(self, parsed):
+        for key, _, value, value_position in parsed:
+            key = self._escape_key(key)
+
+            if key.startswith("@"):
+                continue
+            elif isinstance(value, (six.text_type)):
+                if not value.strip():
+                    continue
+                elif f"@{key}.type" in self.existing_keys:
+                    continue
+
+                context_key = f"@{key}.context"
+                context_value = self.metadata[context_key] \
+                    if context_key in self.metadata.keys() else ""
+                description_key = f"@{key}.description"
+                description_value = self.metadata[description_key] \
+                    if description_key in self.metadata.keys() else ""
+
+                openstring = self._create_openstring(key, value,
+                                                     value_position,
+                                                     context_value,
+                                                     description_value)
+                if openstring:
+                    self.stringset.append(openstring)
+            else:
+                # Ignore other JSON types (bools, nulls, numbers)
+                pass
+
+    def _create_openstring(self, key, value, value_position,
+                           context_value, description_value):
+        parser = ICUParser(allow_numeric_plural_values = True)
+        icu_string = parser.parse(key, value)
+        if icu_string:
+            return self._create_pluralized_string(icu_string, value_position,
+                                                  context_value,
+                                                  description_value)
+
+        return self._create_regular_string(
+            key, value, value_position,
+            context_value, description_value
+        )
+
+    def compile(self, template, stringset, language_info=None, **kwargs):
+        # Lets play on the template first, we need it to not include the hashes
+        # that aren't in the stringset. For that we will create a new stringset
+        # which will have the hashes themselves as strings and compile against
+        # that. The compilation process will remove any string sections that
+        # are absent from the stringset. Next we will call `_clean_empties`
+        # from the template to clear out any `...,  ,...` or `...{ ,...`
+        # sequences left. The result will be used as the actual template for
+        # the compilation process
+        self.keep_sections = kwargs.get('keep_sections', True)
+
+        stringset = list(stringset)
+
+        fake_stringset = [
+            OpenString(openstring.key,
+                    openstring.template_replacement,
+                    order=openstring.order,
+                    pluralized=openstring.pluralized)
+            for openstring in stringset
+        ]
+        new_template = self._replace_translations(
+            template, fake_stringset, False
+        )
+        new_template = self._clean_empties(new_template)
+
+        if language_info is not None:
+            match = re.search(r'(\"@@locale\"\s*:\s*\")([A-Z_a-z]*)\"', new_template)
+            if match:
+                new_template = u"{}{}{}".format(
+                    new_template[:match.start(2)],
+                    language_info["code"],
+                    new_template[match.end(2):]
+                )
+
+        return self._replace_translations(new_template, stringset, True)
+
+    def _copy_until_and_remove_section(self, pos):
+        """
+        Copy characters to the transcriber until the given position,
+        then end the current section.
+        """
+        self.transcriber.copy_until(pos)
+        self.transcriber.mark_section_end()
+        # Unlike the JSON format, do not remove the remaining section of the template
+        if self.keep_sections == False:  # needed for a test
+            self.transcriber.remove_section()
+
+
 class StructuredJsonHandler(JsonHandler):
     """Handler that preserves certain keys for internal usage, while
     keeping the flexibility and functionality of the original JsonHandler. It
@@ -534,16 +691,15 @@ def _compile_value(self, value, template_value, value_position, skip=False):
                 self.transcriber.add(u"null")
             else:
                 if template_value is None:
-                    self.transcriber.add(u"\"{}\"".format(value))
+                    self.transcriber.add(f"\"{value}\"")
                 else:
-                    self.transcriber.add(u"{}".format(value))
+                    self.transcriber.add(f"{value}")
         else:
             self.transcriber.add(u"null")
 
-        self.transcriber.skip(len(u"{}".format(template_value)))
+        self.transcriber.skip(len(f"{template_value}"))
         self.transcriber.copy_until(value_position +
-                                    len(u"{}".format(template_value)) +
-                                    1)
+                                    len(f"{template_value}") + 1)
 
     def _compile_recursively(self, current_part):
         if isinstance(current_part, DumbJson):
@@ -623,7 +779,7 @@ def _compile_recursively(self, current_part):
                                                 value_position)
                         elif not isinstance(value, DumbJson):
                             self.transcriber.copy_until(
-                                value_position + len(u"{}".format(value)) + 1
+                                value_position + len(f"{value}") + 1
                             )
 
                     extra_elements = []
@@ -765,7 +921,7 @@ def _parse_key(self, key):
         """
         # We need to parse only STRING_KEY keys, otherwise we should
         # early return
-        if not key.endswith(".{}".format(self.STRING_KEY)):
+        if not key.endswith(f".{self.STRING_KEY}"):
             return None
         # Remove the STRING_KEY part of the key as it is not needed. Add +1
         # when calculating the length of the STRING_KEY, for the "." character
@@ -1004,8 +1160,7 @@ def parse(self, content, **kwargs):
              outer_key_position, outer_value, outer_value_position) in parsed:
             if outer_key in existing_keys:
                 transcriber.copy_until(outer_key_position)
-                raise ParseError(u"Key '{}' appears multiple times (line {})".
-                                 format(outer_key, transcriber.line_number))
+                raise ParseError(f"Key '{outer_key}' appears multiple times (line {transcriber.line_number})")
             existing_keys.add(outer_key)
 
             if not isinstance(outer_value, DumbJson):

diff --git a/openformats/tests/formats/arb/__init__.py b/openformats/tests/formats/arb/__init__.py
diff --git a/openformats/tests/formats/arb/files/1_el.arb b/openformats/tests/formats/arb/files/1_el.arb
@@ -0,0 +1,43 @@
+{
+  "@@locale": "en_US",
+  "@@x-template": "path/to/template.arb",
+  "@@context": "HomePage",
+
+  "MSG_OK": "el:Everything works fine.",
+
+  "title_bar": "el:My Cool Home",
+  "@title_bar": {
+    "type": "text",
+    "context": "HomePage",
+    "description": "Page title."
+  },
+
+  "total_files": "{ item_count, plural, one {el:You have {file_count} file.} other {el:You have {file_count} files.} }",
+  "special_chars": "{ cnt, plural, one {el:This is Sam's book.} other {el:These are Sam's books.} }",
+  "gold_coins": "{count, plural, zero {el:The chest is empty.} one {el:You have one gold coin.} other {el:You have {cnt} gold coins.}}",
+  "custom_plural_value": "{number, plural, one {el:1 New} two {el:# New}}",
+
+  "logo@src": "images/001.jpg",
+  "@logo@src": {
+      "context": "arb_editor",
+      "type": "image",
+      "description": "logo image, 128x128"
+  },
+
+  "font_style": "#title {font-family: Verdana, Geneva, sans-serif; font-style: oblique; font-size: 36px}",
+  "@font_style": {
+      "context": "arb_editor",
+      "type": "css",
+      "description": "font specific css"
+  },
+
+  "input_test1@placeholder": "el:localized placeholder text",
+  "input_test2@value": "el:localized input value",
+
+  "logo": "el:ARB",
+  "@logo": {
+    "type": "text",
+    "screen": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBhQGBRUIBwgKFQkKDRYODRYMFhYfHhoWHRweHB8cHh4cJzIqIyUkHB4cITssLycpLCwsFSExPjAtNSgrLEABCQoKDQsNGQ4OGTUkHiQ1LDU1NS4sLCo1NTYpNTYpLCw1NS40NCw0LikuKSkpLCwpLCw0KTQpKSwsNCkpNCkpLP/AABEIADIAMgMBIgACEQEDEQH/xAAbAAACAwEBAQAAAAAAAAAAAAAABQIEBgcBA//EADQQAAEDAgIIAwUJAAAAAAAAAAEAAgMEEQUhBhITMUFRYXEUIjIjgbHB0QcWNEJTkaGi4f/EABgBAAMBAQAAAAAAAAAAAAAAAAABBAUC/8QAIBEAAgEEAgMBAAAAAAAAAAAAAAECAxExQQQhEiKhFP/aAAwDAQACEQMRAD8A7c30+4LN6WSubVwxxyPbYufdhIzyA3dyvtJjTqHFjHKWvpZbOjLbXFsiLjI2cDlvzVPSmobII6mJwLQ19rcxY2PLsmVUINVFddFj7xeGwNz5zeoiadU88sj9f9SfB8VqKOQvmqHSMkiHllPpflmCM7b7hIJ6uSplAkn9kDrFthY24Gwum9LXtqachoIeBmD8QeKWDS/PCEX0nf4OsG0hkmxjYVjmmOa4ZqgDVIBPcg9ei1Nlh8FDY8VbNM6zIgXn9rD+Sr2JaXPjkEVBTN13uDI9oTck7hYfVMhrUPKfotGrQq8DX+HbtpGbXVGvqjK9s7e9epEVjn+k8UmB4trAu8FUv2sd9weR5gOR49R2VeWsGI0oEbgJb2IPG+S6NV0TK+jNPVRNdE8WcHLn+O6DzUFUZMHa99M4ekWLmnkQSLjrv+KT6NCjyY+KUumtmbnqNhUGF5Ie02IKvUkxEJlYPLexKli2D1FDRNrcSpYtj6Ha4zHImxNhwGe9JjWSOpCwgiFxvEQ02yyIHPuk6mmiyNeMkOZMW1W2a4342TvQzAXYhiLcYqmkQwkmC/5iQRcdBc58Sk+iNBTmU1OkNXFZpAhidffzNt/AAZ9eS6w1oa2zQAAMrLq60S8nkNJwis7JoQhBmEW+kdgpWUW+kdgpIAyv2iR+I0dbSkfiayCP+4PyT+sw2PEaXYVdOx0Rys4fDkkOlz9pjNDSfq120PZgv81qBuSWzlN3Yjw/Q6mwup29NS+1YbtL3OdY8wHGwPVPEL1M7cnLLBCEIERb6R2CkhCAKlTA2Sdkj42F8TzsyQLt8h3HgrQQhAlk9QhCBghCEAf/2Q==",
+    "video": "http://www.youtube.com/user_interaction"
+  }
+}
diff --git a/openformats/tests/formats/arb/files/1_en.arb b/openformats/tests/formats/arb/files/1_en.arb
@@ -0,0 +1,43 @@
+{
+  "@@locale": "en_US",
+  "@@x-template": "path/to/template.arb",
+  "@@context": "HomePage",
+
+  "MSG_OK": "Everything works fine.",
+
+  "title_bar": "My Cool Home",
+  "@title_bar": {
+    "type": "text",
+    "context": "HomePage",
+    "description": "Page title."
+  },
+
+  "total_files": "{ item_count, plural, one {You have {file_count} file.} other {You have {file_count} files.} }",
+  "special_chars": "{ cnt, plural, one {This is Sam's book.} other {These are Sam's books.} }",
+  "gold_coins": "{count, plural, zero {The chest is empty.} one {You have one gold coin.} other {You have {cnt} gold coins.}}",
+  "custom_plural_value": "{number, plural, =1 {1 New} =2 {# New}}",
+
+  "logo@src": "images/001.jpg",
+  "@logo@src": {
+      "context": "arb_editor",
+      "type": "image",
+      "description": "logo image, 128x128"
+  },
+
+  "font_style": "#title {font-family: Verdana, Geneva, sans-serif; font-style: oblique; font-size: 36px}",
+  "@font_style": {
+      "context": "arb_editor",
+      "type": "css",
+      "description": "font specific css"
+  },
+
+  "input_test1@placeholder": "localized placeholder text",
+  "input_test2@value": "localized input value",
+
+  "logo": "ARB",
+  "@logo": {
+    "type": "text",
+    "screen": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBhQGBRUIBwgKFQkKDRYODRYMFhYfHhoWHRweHB8cHh4cJzIqIyUkHB4cITssLycpLCwsFSExPjAtNSgrLEABCQoKDQsNGQ4OGTUkHiQ1LDU1NS4sLCo1NTYpNTYpLCw1NS40NCw0LikuKSkpLCwpLCw0KTQpKSwsNCkpNCkpLP/AABEIADIAMgMBIgACEQEDEQH/xAAbAAACAwEBAQAAAAAAAAAAAAAABQIEBgcBA//EADQQAAEDAgIIAwUJAAAAAAAAAAEAAgMEEQUhBhITMUFRYXEUIjIjgbHB0QcWNEJTkaGi4f/EABgBAAMBAQAAAAAAAAAAAAAAAAABBAUC/8QAIBEAAgEEAgMBAAAAAAAAAAAAAAECAxExQQQhEiKhFP/aAAwDAQACEQMRAD8A7c30+4LN6WSubVwxxyPbYufdhIzyA3dyvtJjTqHFjHKWvpZbOjLbXFsiLjI2cDlvzVPSmobII6mJwLQ19rcxY2PLsmVUINVFddFj7xeGwNz5zeoiadU88sj9f9SfB8VqKOQvmqHSMkiHllPpflmCM7b7hIJ6uSplAkn9kDrFthY24Gwum9LXtqachoIeBmD8QeKWDS/PCEX0nf4OsG0hkmxjYVjmmOa4ZqgDVIBPcg9ei1Nlh8FDY8VbNM6zIgXn9rD+Sr2JaXPjkEVBTN13uDI9oTck7hYfVMhrUPKfotGrQq8DX+HbtpGbXVGvqjK9s7e9epEVjn+k8UmB4trAu8FUv2sd9weR5gOR49R2VeWsGI0oEbgJb2IPG+S6NV0TK+jNPVRNdE8WcHLn+O6DzUFUZMHa99M4ekWLmnkQSLjrv+KT6NCjyY+KUumtmbnqNhUGF5Ie02IKvUkxEJlYPLexKli2D1FDRNrcSpYtj6Ha4zHImxNhwGe9JjWSOpCwgiFxvEQ02yyIHPuk6mmiyNeMkOZMW1W2a4342TvQzAXYhiLcYqmkQwkmC/5iQRcdBc58Sk+iNBTmU1OkNXFZpAhidffzNt/AAZ9eS6w1oa2zQAAMrLq60S8nkNJwis7JoQhBmEW+kdgpWUW+kdgpIAyv2iR+I0dbSkfiayCP+4PyT+sw2PEaXYVdOx0Rys4fDkkOlz9pjNDSfq120PZgv81qBuSWzlN3Yjw/Q6mwup29NS+1YbtL3OdY8wHGwPVPEL1M7cnLLBCEIERb6R2CkhCAKlTA2Sdkj42F8TzsyQLt8h3HgrQQhAlk9QhCBghCEAf/2Q==",
+    "video": "http://www.youtube.com/user_interaction"
+  }
+}