Skip to content

Commit

Permalink
999999999_521850.py (#43): hxl_hashtag_normalizatio() created
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed Jul 31, 2022
1 parent dcb5138 commit 0c17c6b
Showing 1 changed file with 104 additions and 3 deletions.
107 changes: 104 additions & 3 deletions officina/999999999/0/L999999999_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -1367,7 +1367,8 @@ def bcp47_langtag(
{'variant': ['variant1'], 'extension': {'a': 'extend1'}, \
'privateuse': ['wadegile', 'private1']}
>>> bcp47_langtag(
... 'en-Latn-US-lojban-gaulish-a-12345678-ABCD-b-ABCDEFGH-x-a-b-c-12345678')
... 'en-Latn-US-lojban-gaulish-a-12345678-ABCD-b-ABCDEFGH-x-a-b-c-12345678',
... strictum=False)
{'Language-Tag': \
'en-Latn-US-lojban-gaulish-a-12345678-ABCD-b-ABCDEFGH-x-a-b-c-12345678', \
'Language-Tag_normalized': \
Expand All @@ -1376,8 +1377,11 @@ def bcp47_langtag(
'variant': ['lojban', 'gaulish'], \
'extension': {'a': '12345678-ABCD', 'b': 'ABCDEFGH'}, \
'privateuse': ['a', 'b', 'c', '12345678'], 'grandfathered': None, \
'_callbacks': {'hxl_attrs': '+i_en+is_latn+ix_12345678+ix_a+ix_b+ix_c', \
'hxl_minimal': None}, '_unknown': [], '_error': []}
'_callbacks': {'hxl_attrs': '+i_en+is_latn+ix_12345678', 'hxl_minimal': None}, \
'_unknown': [], '_error': ['private tag len = 1 [a]', \
'private tag len = 1 [b]', 'private tag len = 1 [c]', \
'private tag len = 1 [a]', 'private tag len = 1 [b]', \
'private tag len = 1 [c]']}
# BCP47: "Example: The language tag "en-a-aaa-b-ccc-bbb-x-xyz" is in
# canonical form, while "en-b-ccc-bbb-a-aaa-X-xyz" is well-formed (...)
Expand Down Expand Up @@ -4841,6 +4845,100 @@ def praeparatio(self):
return self


def hxl_hashtag_normalizatio(hashtag: str) -> str:
"""hxl_hashtag_normalizatio _summary_
_extended_summary_
Args:
hashtag (str): _description_
Raises:
SyntaxError: _description_
Returns:
str: _description_
>>> hxl_hashtag_normalizatio(' #date+stArT ')
'#date+start'
>>> hxl_hashtag_normalizatio(
... '#item+rem+i_qcc+is_zxxx+ix_zza+rdf_t_xsd_datetime+ix_aaa+ix_aaa')
'#item+rem+i_qcc+is_zxxx+ix_aaa+ix_zzz+rdf_t_xsd_datetime'
>>> hxl_hashtag_normalizatio(
... '#item+rem+i_qcc+is_zxxx+ix_zza+rdf_t_xsd_datetime+ix_aaz')
'#item+rem+i_qcc+is_zxxx+ix_aaa+ix_zzzz+rdf_t_xsd_datetime'
>>> hxl_hashtag_normalizatio(
... '#item+rem+i_qcc+is_zxxx+ix_zzzz+ix_zzz+rdf_t_xsd_int+ix_aaa')
'#item+rem+i_qcc+is_zxxx+ix_aaa+ix_zzzz+rdf_t_xsd_datetime'
"""
hxltm_prefix = ('#item+rem', '#meta+rem', '#status+rem')
hxltm_hashtag = None
_hxltm_attrs = []
_hxltm_attrs_lang = None
_hxltm_attrs_script = None
_hxltm_attrs_ix = []
_hxltm_attrs_rest = []

if not hashtag or not hashtag.strip().startswith('#'):
raise SyntaxError(hashtag)
hashtag = hashtag.strip().lower()
if not hashtag.startswith(hxltm_prefix):
# Assime is a generic HXL hashtag. Return as it is
# Similar to #item+conceptum+(...), which are simpler and require
# no special sorting
return hashtag

for item in hxltm_prefix:
if hashtag.startswith(item):
hxltm_hashtag = item
_hxltm_attrs = hashtag.replace(item + '+', '').split('+')
break

# Remove duplicates
_hxltm_attrs = list(dict.fromkeys(_hxltm_attrs))

for item in _hxltm_attrs:
if item.startswith('i_') and len(item) == 5:
_hxltm_attrs_lang = item
elif item.startswith('is_') and len(item) == 7:
_hxltm_attrs_script = item
elif item.startswith('ix_') and len(item) >= 5:
_hxltm_attrs_ix.append(item)
else:
_hxltm_attrs_rest.append(item)

if _hxltm_attrs_lang is None:
raise SyntaxError(f'+i_qqq? <{hashtag}>')
if _hxltm_attrs_script is None:
raise SyntaxError(f'+is_qqqq? <{hashtag}>')

# if _hxltm_attrs_lang is None or _hxltm_attrs_script is None:
# raise SyntaxError(hashtag)

hxltm_hashtag += '+' + _hxltm_attrs_lang
hxltm_hashtag += '+' + _hxltm_attrs_script
if len(_hxltm_attrs_ix) > 0:
_hxltm_attrs_ix.sort()
hxltm_hashtag += '+{0}'.format('+'.join(_hxltm_attrs_ix))
if len(_hxltm_attrs_rest) > 0:
_hxltm_attrs_rest.sort()
hxltm_hashtag += '+{0}'.format('+'.join(_hxltm_attrs_rest))

return hxltm_hashtag
# # @TODO improve sorthing
# _hxltm_attrs.sort()

# # return hashtag
# return '{0}+{1}'.format(
# hxltm_hashtag,
# '+'.join(_hxltm_attrs)
# )


def hxl_hashtag_to_bcp47(
hashtag: str,
) -> str:
Expand Down Expand Up @@ -4946,6 +5044,8 @@ def hxl_hashtag_to_bcp47(
result['privateuse'] = privateuse

if len(rdf_parts) > 0:
# rdf_parts = sorted(rdf_parts)
rdf_parts.sort()
result['_callbacks']['rdf_parts'] = rdf_parts
# value_prefixes = None
for item in rdf_parts:
Expand Down Expand Up @@ -5117,6 +5217,7 @@ def hxl_hashtag_to_bcp47(
result['_unknown'].append('rdf_parts [{0}]'.format(item))
# pass
if len(_bcp47_g_parts) > 0:
_bcp47_g_parts.sort()
result['extension']['r']['rdf:Statement_raw'] = \
'r-' + '-'.join(_bcp47_g_parts)
# norm.append('r-' + '-'.join(_bcp47_g_parts))
Expand Down

0 comments on commit 0c17c6b

Please sign in to comment.