Skip to content

Commit

Permalink
999999999_54872.py (#2): numerordinatio_data__hxltm_to_bcp47()
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed Jun 29, 2022
1 parent 1430a33 commit 28045bc
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 73 deletions.
75 changes: 75 additions & 0 deletions officina/999999999/0/999999999_54872.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ def make_args(self, hxl_output=True):
'_temp_hxl_meta_in_json',
'_temp_header_hxl_to_bcp47',
'_temp_header_bcp47_to_hxl',
'_temp_data_hxl_to_bcp47',
'_temp_bcp47_to_bcp47_shortnames',
'_temp_no1_to_no1_shortnames',
],
Expand Down Expand Up @@ -503,12 +504,44 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,
rdf_ontologia_ordinibus=pyargs.rdf_ontologia_ordinibus,
est_meta=True)

# print(' aa', meta['caput_asa'])
# print(' bb', meta['caput_asa']['caput_originali'])
# print(' aa', meta['caput_asa']['caput_ad_columnae_i'])

numerordinatio_data__sortnames(
meta['caput_asa'], _infile, est_bcp47=est_bcp47,
punctum_separato=fontem_separato)

return self.EXIT_OK

# _temp_data_hxl_to_bcp47
# Simplistic conversion of header
if pyargs.objectivum_formato == '_temp_data_hxl_to_bcp47':
if _stdin:
raise NotImplementedError('{0} not with stdin'.format(
pyargs.objectivum_formato))
# print('oi')

numerordinatio_data__hxltm_to_bcp47(
fontem=_infile, punctum_separato=fontem_separato
)

# caput, data = hxltm_carricato_brevibus(
# _infile, _stdin, punctum_separato=fontem_separato)

# caput_novo = []
# for _item in caput:
# # print('hxl item > ', _item)
# _hxl = HXLHashtagSimplici(_item).praeparatio()
# _item_bcp47 = _hxl.quod_bcp47(strictum=False)
# # print('_item_bcp47 > ', _item_bcp47)
# caput_novo.append(_item_bcp47)
# caput = caput_novo

# print('@TODO')

return self.EXIT_OK

# @TODO maybe refactor this temporary part
# if pyargs.objectivum_formato == '_temp_bcp47_meta_in_json':
if pyargs.objectivum_formato in [
Expand Down Expand Up @@ -866,6 +899,44 @@ def actio(self):
# print('failed')


def numerordinatio_data__hxltm_to_bcp47(
fontem: str, punctum_separato: str = ","
):
# json.dumps(caput_asa)
# print(json.dumps(caput_asa))
# return ''
# print(caput_asa['caput_originali'])
# print(caput_asa['caput_ad_columnae_i'])

caput, _data = hxltm_carricato_brevibus(
fontem, est_stdin=False, punctum_separato=punctum_separato)

caput_novo = []
for _item in caput:
# print('hxl item > ', _item)
_hxl = HXLHashtagSimplici(_item).praeparatio()
_item_bcp47 = _hxl.quod_bcp47(strictum=False)
# print('_item_bcp47 > ', _item_bcp47)
caput_novo.append(_item_bcp47)

res_novae = []

with open(fontem, 'r') as _fons:
_writer = csv.writer(sys.stdout, delimiter=punctum_separato)
_csv_reader = csv.reader(_fons, delimiter=punctum_separato)

# discard original header
next(_csv_reader)
# _writer.writerow(_header_original)
_writer.writerow(caput_novo)

for linea in _csv_reader:
linea_novae = linea
if len(res_novae) > 0:
linea_novae.extend(res_novae)
_writer.writerow(linea_novae)


def numerordinatio_data__sortnames(
caput_asa: dict, fontem: str,
est_bcp47: bool = True, punctum_separato: str = ","
Expand All @@ -889,6 +960,8 @@ def numerordinatio_data__sortnames(
if not est_bcp47:
caput_novo = numerordinatio_caput_bcp47_to_hxlhashtag(caput_novo)

# print(' oi', caput_novo)

with open(fontem, 'r') as _fons:
_writer = csv.writer(sys.stdout, delimiter=punctum_separato)
_csv_reader = csv.reader(_fons, delimiter=punctum_separato)
Expand Down Expand Up @@ -919,6 +992,8 @@ def numerordinatio_caput_bcp47_to_hxlhashtag(
# item_meta = bcp47_langtag(item,strictum=False)
item_meta = bcp47_langtag(item)

# print(' 333', item_meta['_callbacks']['hxl_attrs'])

if len(item_meta['_error']) == 0 and \
item_meta['Language-Tag_normalized']:
caput_novo.append('{0}{1}'.format(
Expand Down
51 changes: 34 additions & 17 deletions officina/999999999/0/L999999999_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,8 @@ def bcp47_langtag_callback_hxl(
str: return HXL attributes (without HXL hashtag)
"""

# raise ValueError(langtag_meta)

resultatum = []
# resultatum.append('+todo')
resultatum.append('+i_{0}'.format(langtag_meta['language'].lower()))
Expand All @@ -1308,7 +1310,12 @@ def bcp47_langtag_callback_hxl(

if langtag_meta['privateuse'] and len(langtag_meta['privateuse']) > 0:
for item in langtag_meta['privateuse']:
resultatum.append('+ix_{0}'.format(item.lower()))
# print(' 444', item)
if len(item) > 1:
resultatum.append('+ix_{0}'.format(item.lower()))
else:
langtag_meta['_error'].append(
'private tag len = 1 [{0}]'.format(item))

if langtag_meta['extension'] and 'r' in langtag_meta['extension']:
_r = langtag_meta['extension']['r']
Expand Down Expand Up @@ -1422,6 +1429,8 @@ def bcp47_langtag_callback_hxl(

resultatum = sorted(resultatum)

# raise ValueError(resultatum, langtag_meta)

return ''.join(resultatum)


Expand All @@ -1443,6 +1452,8 @@ def bcp47_langtag_callback_hxl_minimal(
extra_parts = []
parts = res.replace('+i_qcc+is_zxxx', '').split('+')

parts = list(filter(None, parts))

# raise ValueError(res)

# We only try to compact interlingual concepts, not linguistic
Expand All @@ -1457,22 +1468,25 @@ def bcp47_langtag_callback_hxl_minimal(
# This may either signal a error OR a HXL tag that subject is implicit
# u2203
if res.find('+rdf_s_u2200_s') == -1 and res.find('+rdf_s_u2203_s') == -1:
# ... however, if it does have a keys -x-LLL (+ix_LLL), let's attempt
# to assume they are unique engouth and sort it.
# ... except if is ix_error
if res.find('+ix_') > -1 and res.find('+ix_error') == -1:
for item in parts:
if item.startswith('ix_'):
minimal_parts.append(item)
else:
extra_parts.append(item)
if len(extra_parts) == 0:
return [res, None]

minimal = '+i_qcc+is_zxxx+' + '+'.join(minimal_parts)
extra = '+'.join(extra_parts)

return [minimal, extra]
# The folowing block is disabled for now as theres some cases
# valid +ix_ prefixes exist, but large tables they would conflict.

# # ... however, if it does have a keys -x-LLL (+ix_LLL), let's attempt
# # to assume they are unique engouth and sort it.
# # ... except if is ix_error
# if res.find('+ix_') > -1 and res.find('+ix_error') == -1:
# for item in parts:
# if item.startswith('ix_'):
# minimal_parts.append(item)
# else:
# extra_parts.append(item)
# if len(extra_parts) == 0:
# return [res, None]

# minimal = '+i_qcc+is_zxxx+' + '+'.join(minimal_parts)
# extra = '+'.join(extra_parts)

# return [minimal, extra]

# ... and do this also for data types such as +rdf_t_xsd_datetime
# even if does not have ix_
Expand Down Expand Up @@ -4678,6 +4692,8 @@ def hxl_hashtag_to_bcp47(
result['_callbacks']['hxl_minimal'] = bcp47_langtag_callback_hxl_minimal(
result, False)

# print(result['_callbacks']['hxl_minimal'])

# print(result['Language-Tag_normalized'] , result['_error'])
# print('[[{0}]]'.format(result['Language-Tag_normalized']))

Expand Down Expand Up @@ -6510,6 +6526,7 @@ def qhxl_attr_2_bcp47(hxlatt: str) -> str:
resultatum = tempus1[0] + '-' + tempus1[1].capitalize()
# @TODO: test better cases with +ix_
resultatum = resultatum.replace('+ix_', '-x-')
# raise ValueError

return resultatum

Expand Down
67 changes: 11 additions & 56 deletions officina/999999999/1603_45_16.lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ bootstrap_1603_45_16__item_bcp47() {
cod_ab_level_max="${5}"
est_temporarium_fontem="${6:-"1"}"
est_temporarium_objectivum="${7:-"0"}"
# shellcheck disable=SC2034
rdf_ontologia_ordinibus="${8:-"5"}"

if [ "$est_temporarium_fontem" -eq "1" ]; then
Expand Down Expand Up @@ -462,83 +463,37 @@ bootstrap_1603_45_16__item_bcp47() {
"${objectivum_archivum_no1}" >"${opus_temporibus_temporarium}"

# Temporary fix: remove some generated tags with error: +ix_error
# Somewhat temporary: remove non-merget alts: +ix_alt0
# Somewhat temporary: remove non-merget alts: +ix_alt1|+ix_alt12|+ix_alt13
# Non-temporary: remove implicit tags: +ix_hxlattrs
hxlcut \
--exclude='#*+ix_error,#*+ix_hxlattrs' \
--exclude='#*+ix_error,#*+ix_hxlattrs,#*+ix_alt1,#*+ix_alt2,#*+ix_alt3' \
"${opus_temporibus_temporarium}" >"${opus_temporibus_temporarium_2}"

# Delete first line ,,,,,
sed -i '1d' "${opus_temporibus_temporarium_2}"

# "${ROOTDIR}/999999999/0/999999999_54872.py" \
# --objectivum-formato=_temp_no1_to_no1_shortnames \
# --numerordinatio-cum-antecessoribus \
# --rdf-sine-spatia-nominalibus=skos,devnull \
# --rdf-ontologia-ordinibus="${rdf_ontologia_ordinibus}" \
# --rdf-trivio="${rdf_trivio}" \
# <"${objectivum_archivum_no1}" >"${opus_temporibus_temporarium}"
frictionless validate "${opus_temporibus_temporarium_2}"

set -x
## Computational-like RDF serialization, "OWL version" --------------------
"${ROOTDIR}/999999999/0/999999999_54872.py" \
--objectivum-formato=_temp_data_hxl_to_bcp47 \
"${opus_temporibus_temporarium_2}" >"${opus_temporibus_temporarium}"

# @TODO fix generation of invalid format if
# --rdf-sine-spatia-nominalibus=skos,devnull is enabled
frictionless validate "${opus_temporibus_temporarium}"

# "${ROOTDIR}/999999999/0/999999999_54872.py" \
# --objectivum-formato=_temp_no1 \
# --objectivum-formato=_temp_no1_to_no1_shortnames \
# --numerordinatio-cum-antecessoribus \
# --rdf-sine-spatia-nominalibus=skos,devnull \
# --rdf-ontologia-ordinibus="${rdf_ontologia_ordinibus}" \
# --rdf-trivio="${rdf_trivio}" \
# <"${objectivum_archivum_no1}" >"${opus_temporibus_temporarium}"

# "${ROOTDIR}/999999999/0/999999999_54872.py" \
# --objectivum-formato=_temp_no1 \
# --numerordinatio-cum-antecessoribus \
# --rdf-sine-spatia-nominalibus=devnull \
# --rdf-ontologia-ordinibus="${rdf_ontologia_ordinibus}" \
# --rdf-trivio="${rdf_trivio}" \
# <"${objectivum_archivum_no1}" >"${opus_temporibus_temporarium}"

# rapper --quiet --input=turtle --output=turtle \
# "${opus_temporibus_temporarium}" \
# >"${objectivum_archivum_no1_owl_ttl}"

# riot --validate "${objectivum_archivum_no1_owl_ttl}"

# ## Linguistic-like RDF serialization, "SKOS version" ----------------------
# # @TODO fix invalid generation if disabling OWL with
# # --rdf-sine-spatia-nominalibus=owl

# # "${ROOTDIR}/999999999/0/999999999_54872.py" \
# # --objectivum-formato=_temp_no1 \
# # --numerordinatio-cum-antecessoribus \
# # --rdf-sine-spatia-nominalibus=owl,obo,p,geo,devnull \
# # --rdf-ontologia-ordinibus="${rdf_ontologia_ordinibus}" \
# # --rdf-trivio="${rdf_trivio}" \
# # <"${objectivum_archivum_no1}" >"${opus_temporibus_temporarium_2}"

# "${ROOTDIR}/999999999/0/999999999_54872.py" \
# --objectivum-formato=_temp_no1 \
# --numerordinatio-cum-antecessoribus \
# --rdf-sine-spatia-nominalibus=obo,p,geo,devnull \
# --rdf-ontologia-ordinibus="${rdf_ontologia_ordinibus}" \
# --rdf-trivio="${rdf_trivio}" \
# <"${objectivum_archivum_no1}" >"${opus_temporibus_temporarium_2}"

# rapper --quiet --input=turtle --output=turtle \
# "${opus_temporibus_temporarium_2}" \
# >"${objectivum_archivum_no1_skos_ttl}"

# riot --validate "${objectivum_archivum_no1_skos_ttl}"
set +x

# echo "OWL TTL: [${objectivum_archivum_no1_owl_ttl}]"
# echo "SKOS TTL: [${objectivum_archivum_no1_skos_ttl}]"
file_update_if_necessary csv "$opus_temporibus_temporarium" "$objectivum_archivum_bcp47"

# rm "$opus_temporibus_temporarium"
# rm "$opus_temporibus_temporarium_2"
rm "$opus_temporibus_temporarium_2"

done

Expand Down

0 comments on commit 28045bc

Please sign in to comment.