Skip to content

Commit

Permalink
999999999_54872.py (#42): start to use skos:related for things withou…
Browse files Browse the repository at this point in the history
…t better explanation (like relation with places)
  • Loading branch information
fititnt committed May 18, 2022
1 parent bb8fdea commit 97db6d1
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 16 deletions.
17 changes: 10 additions & 7 deletions officinam/999999999/0/999999999_10263485.meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ methodus:
# ex (+ ablative), https://en.wiktionary.org/wiki/ex#Latin
# locālī, n, s, dativus, https://en.wiktionary.org/wiki/localis#Latin
identitas_locali_ex_hxl_hashtag:
- '#item+conceptum+codicem'
- '#item+rem+i_qcc+is_zxxx+ix_v76vcnes'
- '#loc+facility+code+v_76_cnes'
- "#item+conceptum+codicem"
- "#item+rem+i_qcc+is_zxxx+ix_v76vcnes"
- "#loc+facility+code+v_76_cnes"

# https://www.wikidata.org/wiki/Property:P131
# Veja https://www.wikidata.org/wiki/Wikidata:List_of_properties/pt-br
Expand All @@ -111,10 +111,13 @@ methodus:
# @TODO mover isso para outro lugar (precisa ser generalizado)
__skos_mapping:
1:
hxltm: '#item+rem+i_qcc+is_zxxx+ix_wikip1585'
hxltm: "#item+rem+i_qcc+is_zxxx+ix_wikip1585"
# Brazilian municipality code (P1585)
predicate: '<http://www.wikidata.org/prop/P1585>'
predicate: "<http://www.wikidata.org/prop/P1585>"
2:
hxltm: '#meta+rem+i_qcc+is_zxxx+ix_wikip6204'
hxltm: "#meta+rem+i_qcc+is_zxxx+ix_wikip6204"
# CNPJ (P6204)
predicate: '<http://www.wikidata.org/prop/P6204>'
predicate: "<http://www.wikidata.org/prop/P6204>"
skos:related:
1:
hxltm: '#item+rem+i_qcc+is_zxxx+ix_wikip1585'
124 changes: 115 additions & 9 deletions officinam/999999999/0/999999999_54872.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,8 @@ class HXLTMAdRDFSimplicis:
# locālī, n, s, dativus, https://en.wiktionary.org/wiki/localis#Latin
# identitas_locali_ex_hxl_hashtag: str = '#item+conceptum+codicem'
identitas_locali_index: int = -1
_hxltm_linguae_index: list = []
_hxltm_linguae_info: dict = {}
_hxltm_meta_index: list = []
_hxltm_meta_info: dict = {}
_hxltm_unlabeled_index: list = []
Expand Down Expand Up @@ -440,6 +442,8 @@ def __init__(
self._post_init()

def _post_init(self):
# @TODO esse desgambiarrizar esse _post_init

if 'identitas_locali_ex_hxl_hashtag' in \
self.fons_configurationi['numerordinatio']:
_test = self.fons_configurationi['numerordinatio']['identitas_locali_ex_hxl_hashtag']
Expand All @@ -453,18 +457,47 @@ def _post_init(self):
if self.identitas_locali_index == -1:
raise ValueError("HXLTMAdRDFSimplicis [{0}] ?? <{1}>".format(
_test, self.caput))
for item in self.caput:

for _index, item in enumerate(self.caput):
attrs = item.replace('#item+rem', '')
bcp47_simplici = qhxl_hxlhashtag_2_bcp47(attrs)
# lingua = bcp47_langtag(bcp47_simplici, [
# # 'Language-Tag',
# 'Language-Tag_normalized',
# 'language'
# ], strictum=False)
# bcp47_langtag

if item not in self._hxltm_labeled:
_index = self.caput.index(item)
# _index = self.caput.index(item)
if item.startswith('#meta'):
self._hxltm_meta_index.append(_index)
self._hxltm_meta_info[_index] = {
'hxltm_hashtag': item
'hxltm_hashtag': item,
'bcp47': bcp47_simplici
}
continue
self._hxltm_unlabeled_index.append(_index)
self._hxltm_unlabeled_info[_index] = {
'hxltm_hashtag': item
'hxltm_hashtag': item,
'bcp47': bcp47_simplici
}

# Language tags only
if not item.startswith('#item+rem'):
continue
# attrs = item.replace('#item+rem', '')
# hxlattslinguae = qhxl_attr_2_bcp47(attrs)
# lingua = bcp47_langtag(hxlattslinguae, [
# # 'Language-Tag',
# 'Language-Tag_normalized',
# 'language'
# ], strictum=False)
if bcp47_simplici and not bcp47_simplici.startswith(('qcc', 'zxx')):
self._hxltm_linguae_index.append(_index)
self._hxltm_linguae_info[_index] = {
'hxltm_hashtag': item,
'bcp47': bcp47_simplici
}

def resultatum_ad_csv(self):
Expand Down Expand Up @@ -505,6 +538,7 @@ def resultatum_ad_turtle(self):
print('# fons_configurationi ' + str(self.fons_configurationi))
print('# _hxltm_unlabeled_info ' + str(self._hxltm_unlabeled_info))
print('# _hxltm_meta_info ' + str(self._hxltm_meta_info))
print('# _hxltm_linguae_info ' + str(self._hxltm_linguae_info))
print('')
print('# @TODO adicionar mais prefixos de '
'https://www.wikidata.org/wiki/EntitySchema:E49')
Expand All @@ -531,18 +565,43 @@ def resultatum_ad_turtle(self):
self.praefixo,
_codex_locali
))
print(' skos:prefLabel "{0}:{1}"@mul-Zyyy-x-n1603 .'.format(
# print(' skos:prefLabel "{0}:{1}"@mul-Zyyy-x-n1603 .'.format(
print(' skos:prefLabel "{0}:{1}"@mul-Zyyy-x-n1603 ;'.format(
self.praefixo,
_codex_locali
))
_skos_related = []
_skos_related_raw = []
for _index, item in enumerate(linea):
if item and _index in self._hxltm_unlabeled_index and \
self._hxltm_unlabeled_info[_index]['bcp47']:
_skos_related_raw.append('"{0}"@{1}'.format(
item.replace('"', '\\"'),
self._hxltm_unlabeled_info[_index]['bcp47']
))
# pass

# @TODO add other related
_skos_related = _skos_related_raw
# linguae = self._quod_linguae(res)
if len(_skos_related) > 0:
print(" skos:related\n {0} .".format(
" ,\n ".join(_skos_related_raw)
))
for _index, item in enumerate(linea):
if len(item) and _index in self._hxltm_unlabeled_index:
print(' # {0} [{1}]'.format(
self._hxltm_unlabeled_info[_index]['hxltm_hashtag'], item))
# if len(item) and _index in self._hxltm_unlabeled_index:
# print(' # {0} [{1}]'.format(
# self._hxltm_unlabeled_info[_index]['hxltm_hashtag'], item))
if len(item) and _index in self._hxltm_meta_index:
print(' ## {0} [{1}]'.format(
print(' # verbose: {0} [{1}]'.format(
self._hxltm_meta_info[_index]['hxltm_hashtag'], item))

# linguae = self._quod_linguae(res)
# if len(linguae) > 0:
# print(" skos:prefLabel\n {0} .".format(
# " ,\n ".join(linguae)
# ))

print('')

return Cli.EXIT_OK
Expand Down Expand Up @@ -753,6 +812,53 @@ def hxltm_carricato(
return caput, list(_reader)


def qhxl_hxlhashtag_2_bcp47(hxlhashtag: str) -> str:
"""qhxl_hxlhashtag_2_bcp47

(try) to convert full HXL hashtag to BCP47

Args:
hxlatt (str):

Returns:
str:
"""
# needs simplification
if not hxlhashtag:
return None
if hxlhashtag.find('i_') == -1 or hxlhashtag.find('is_') == -1:
return None
hxlhashtag_parts = hxlhashtag.split('+')
# langattrs = []
_bcp_lang = ''
_bcp_stript = ''
_bcp_extension = []
for item in hxlhashtag_parts:
if item.startswith('i_'):
_bcp_lang = item.replace('i_', '')
if item.startswith('is_'):
_bcp_stript = item.replace('is_', '')
if item.startswith('ix_'):
_bcp_extension.append(item.replace('ix_', ''))
# if not item.startswith(('i_', 'is_', 'ix_')):
# continue
# langattrs.append(item)

if not _bcp_lang or not _bcp_stript:
return False

bcp47_simplici = "{0}-{1}".format(
_bcp_lang.lower(), _bcp_stript.capitalize())
if len(_bcp_extension) > 0:
_bcp_extension = sorted(_bcp_extension)
bcp47_simplici = "{0}-x-{1}".format(
bcp47_simplici,
'-'.join(_bcp_extension)
)

return bcp47_simplici


if __name__ == "__main__":

est_cli = Cli()
Expand Down

0 comments on commit 97db6d1

Please sign in to comment.