Skip to content

Commit

Permalink
999999999_521850.py (#43): HXLTM wide near working; No1 (RDF ttl) gen…
Browse files Browse the repository at this point in the history
…erate data (but incomplete)
  • Loading branch information
fititnt committed Aug 3, 2022
1 parent fceb3a7 commit 7beef2e
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 75 deletions.
63 changes: 32 additions & 31 deletions officina/999999999/0/999999999_521850.py
Original file line number Diff line number Diff line change
Expand Up @@ -1153,39 +1153,25 @@ def de_hxltm_ad_hxltm_wide(
"""

# data_sorted = self._data_sort(fonti)
data_sorted = hxltm__data_sort(
fonti, ['#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio'])

if '#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio' in self._caput:
data_sorted = hxltm__data_sort(
fonti, ['#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio'])
else:
data_sorted = hxltm__data_sort(fonti)

caput, data = hxltm__data_pivot_wide(data_sorted[0], data_sorted[1:])

# print(data_sorted[0:10])
print(caput, data[0:10])
# print(caput, data[0:10])

raise NotImplementedError
# raise NotImplementedError

with open(objetivum, 'w') as _objetivum:
# with open(fonti, 'r') as _fons:
_csv_writer = csv.writer(_objetivum)
for linea in data_sorted:
# _csv_reader = csv.reader(_fons)
# _csv_writer = csv.writer(_objetivum)
started = False
# for linea in _csv_reader:
if not started:
started = True
caput = linea
# caput = self._no1lize(linea)
# if '#item+conceptum+numerordinatio' not in caput:
# numerordinatio_inconito = True
# codicem_index = caput.index(
# '#item+conceptum+codicem')
# caput.insert(0, '#item+conceptum+numerordinatio')
self._caput = caput
_csv_writer.writerow(caput)
continue
# if numerordinatio_inconito is True:
# linea.insert(0, '{0}:{1}'.format(
# self.numerordinatio_praefixo, linea[codicem_index]))
_csv_writer.writerow(caput)
for linea in data:
_csv_writer.writerow(linea)

def de_hxltm_ad_no1(self, fonti: str, objetivum: str):
Expand Down Expand Up @@ -1587,21 +1573,36 @@ def praeparatio(self):
self._temp['hxl'], self._temp['hxltm'], hxl_vocab=hxl_vocab
)

if self.objectivum_formato in ['hxltm-wide', 'no1'] or \
(self.objectivum_formato == 'no1' and
'#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio' in self._caput):
if self.objectivum_formato == 'hxltm-wide':
hxl_vocab = False
if self.methodus == 'health':
self._hxlPivot = DATA_HXL_DE_CSV_REGEX['worldbank']
hxl_vocab = True
# if self.methodus == 'health':
# self._hxlPivot = DATA_HXL_DE_CSV_REGEX['worldbank']
# hxl_vocab = True
self.de_hxltm_ad_hxltm_wide(
self._temp['hxltm'], self._temp['hxltm_wide']
)

# We also generate wide data implicitly if result needs it
if self.objectivum_formato == 'no1' and \
('#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio' in self._caput):
hxl_vocab = False
# if self.methodus == 'health':
# self._hxlPivot = DATA_HXL_DE_CSV_REGEX['worldbank']
# hxl_vocab = True
self.de_hxltm_ad_hxltm_wide(
self._temp['hxltm'], self._temp['hxltm_wide']
)

if self.objectivum_formato in ['no1']:
self.de_hxltm_ad_no1(
self._temp['hxltm_wide'], self._temp['no1']
)

elif self.objectivum_formato in ['no1']:
self.de_hxltm_ad_no1(
self._temp['hxltm'], self._temp['no1']
)
else:
SyntaxError('{}??'.format(self.objectivum_formato))


if __name__ == "__main__":
Expand Down
98 changes: 54 additions & 44 deletions officina/999999999/0/L999999999_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -5470,6 +5470,9 @@ def hxltm__data_pivot_wide(caput: list, data: list) -> list:

sorted(referens_hxlattrs)


# raise ValueError(len(referens_hxlattrs) * len(referens_ad_indici))

columna_novae__list = []
columna_novae__mapping = {}

Expand All @@ -5481,13 +5484,19 @@ def hxltm__data_pivot_wide(caput: list, data: list) -> list:
if res in columna_novae__list:
continue
columna_novae__list.append(res)
# columna_novae__mapping[item_I] = columna_novae__list.index(res)
columna_novae__mapping[item_I] = len(columna_novae__list) - 1
columna_novae__mapping[item_I] = columna_novae__list.index(res)
# columna_novae__mapping[item_I] = len(columna_novae__list) - 1
# pass
# columna_novae__list

data_novae__dict = {}
_codice_indici = caput.index('#item+conceptum+codicem')
_matrix_size = len(referens_hxlattrs) * len(referens_ad_indici)
# raise ValueError(_matrix_size, [''] * _matrix_size)

_do_not_merge = []
# These already will be on data
_do_not_merge.extend(referens_ad_indici)
for linea in data:
_codicem = linea[_codice_indici]
_referens = linea[referens_per_indici]
Expand All @@ -5497,67 +5506,68 @@ def hxltm__data_pivot_wide(caput: list, data: list) -> list:
data_novae__dict[_codicem] = {
'originalis': linea,
# 'data_novae': [''] * len(columna_novae__list),
'data_novae': [''] * (len(columna_novae__list) + 1),
# 'data_novae': [''] * (len(columna_novae__list) + 1),
'data_novae': [''] * _matrix_size,
'data_meta': [],
}
else:
# This will try to compare if we can safely add columns that would
# be equal anyway
for item_index, item_value in enumerate(linea):
if item_index in _do_not_merge:
continue
if item_value != \
data_novae__dict[_codicem]['originalis'][item_index]:
_do_not_merge.append(item_index)

__loop = 0
# __loop = -1
# print(len(data_novae__dict[_codicem]['data_novae']))
# raise NotImplementedError(referens_ad_indici)
for index_originalis in referens_ad_indici:
# index_novae = __start + index_originalis
# print(__loop, len(data_novae__dict[_codicem]['data_novae']))
index_novae = __start + __loop
if index_novae not in data_novae__dict[_codicem]['data_novae']:
print('error', index_novae, len(data_novae__dict[_codicem]['data_novae']))
continue
if not index_novae < len(data_novae__dict[_codicem]['data_novae']):
break
# if index_novae not in data_novae__dict[_codicem]['data_novae']:
# print('error', __start, index_novae, len(data_novae__dict[_codicem]['data_novae']))
# continue
# print('antes', index_novae, len(data_novae__dict[_codicem]['data_novae']))
data_novae__dict[_codicem]['data_novae'][index_novae] = \
linea[index_originalis]
# print('depois', index_novae)
# data_novae__dict[_codicem]['data_novae'][index_originalis] = \
# linea[index_originalis]
__loop += 1

for codicem in data_novae__dict:
for _old_index, _old_value in enumerate(data_novae__dict[codicem]['originalis']):
if _old_index not in _do_not_merge:
data_novae__dict[codicem]['data_meta'].append(_old_value)

# for item in caput:
# for item_II in referens_ad_indici:

# pass
# raise ValueError(_old_index)
_caput_novo_meta = []
for _old_index, _old_caput in enumerate(caput):
if _old_index not in _do_not_merge:
_caput_novo_meta.append(_old_caput)

raise NotImplementedError(data_novae__dict['4'])
raise NotImplementedError(columna_novae__list, columna_novae__mapping)

# if not sortkeys:
# sortkeys = []

# if '#item+conceptum+codicem' not in sortkeys:
# sortkeys.insert(0, '#item+conceptum+codicem')

# _data = []
# caput = []
# with open(fonti, 'r') as _fons:
# _csv_reader = csv.reader(_fons)
# # started = False
# for linea in _csv_reader:
# if len(caput) == 0:
# caput = linea
# continue
# _data.append(linea)

# _i0 = caput.index(sortkeys[0])
# if len(sortkeys) == 1:
# _data = sorted(_data, key=lambda row: int(row[_i0]))
# elif len(sortkeys) == 2:
# _i1 = caput.index(sortkeys[1])
# _data = sorted(_data, key=lambda row: (int(row[_i0]), row[_i1]))
# elif len(sortkeys) == 3:
# _i1 = caput.index(sortkeys[1])
# _i2 = caput.index(sortkeys[2])
# _data = sorted(
# _data, key=lambda row: (int(row[_i0]), row[_i1], row[_i2]))
# else:
# raise NotImplementedError('len > 3; [{}] <{}>'.format(
# len(sortkeys), sortkeys))
caput_novo = _caput_novo_meta + columna_novae__list
data_novo = []
for codicem in data_novae__dict:
data_novo.append(data_novae__dict[codicem]['data_meta'] \
+ data_novae__dict[_codicem]['data_novae'])

# resultatum = []
# resultatum.append(caput)
# resultatum.extend(_data)
# raise NotImplementedError(caput_novo, data_novo[0])
# raise NotImplementedError(data_novae__dict['4'])
# raise NotImplementedError(columna_novae__list, columna_novae__mapping)

return caput, data

return caput_novo, data_novo


def hxltm__concat(
Expand Down
41 changes: 41 additions & 0 deletions officina/999999999/999999_17.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,17 @@ set -x
--objectivum-formato=hxltm \
>"999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.tm.hxl.csv"

frictionless validate "999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.tm.hxl.csv"

./999999999/0/999999999_521850.py \
--methodus-fonti=worldbank \
--methodus="SP.POP.TOTL" \
--objectivum-formato=no1 \
--numerordinatio-praefixo="1603_992_1_0" \
>"999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.no1.tm.hxl.csv"

frictionless validate "999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.no1.tm.hxl.csv"

./999999999/0/999999999_54872.py \
--methodus=_temp_no1 \
--rdf-sine-spatia-nominalibus=devnull,mdciii \
Expand All @@ -102,18 +106,55 @@ rdfpipe --input-format=turtle --output-format=longturtle \
--objectivum-formato=hxl \
>"999999/0/1603_99966_1_0~worldbank~health.hxl.csv"

frictionless validate "999999/0/1603_99966_1_0~worldbank~health.hxl.csv"

./999999999/0/999999999_521850.py \
--methodus-fonti=worldbank \
--methodus="health" \
--objectivum-formato=hxltm \
>"999999/0/1603_99966_1_0~worldbank~health.tm.hxl.csv"

frictionless validate "999999/0/1603_99966_1_0~worldbank~health.tm.hxl.csv"

./999999999/0/999999999_521850.py \
--methodus-fonti=worldbank \
--methodus="health" \
--objectivum-formato=hxltm-wide \
>"999999/0/1603_99966_1_0~worldbank~health~wide.tm.hxl.csv"

frictionless validate "999999/0/1603_99966_1_0~worldbank~health~wide.tm.hxl.csv"

./999999999/0/999999999_521850.py \
--methodus-fonti=worldbank \
--methodus="SP.POP.TOTL" \
--objectivum-formato=no1 \
--numerordinatio-praefixo="1603_992_1_0" \
>"999999/0/1603_99966_1_0~worldbank~health~wide.no1.tm.hxl.csv"


frictionless validate "999999/0/1603_99966_1_0~worldbank~health~wide.no1.tm.hxl.csv"

# @TODO add other pivots
# ./999999999/0/999999999_54872.py \
# --methodus=_temp_no1 \
# --rdf-sine-spatia-nominalibus=devnull,mdciii \
# --rdf-trivio=1603 \
# --rdf-per-trivio='iso8601v' \
# "999999/0/1603_99966_1_0~worldbank~health~wide.no1.tm.hxl.csv" \
# >"999999/0/1603_99966_1_0~worldbank~health~TEMP.no1.owl.ttl"

./999999999/0/999999999_54872.py \
--methodus=_temp_no1 \
--rdf-sine-spatia-nominalibus=devnull,mdciii \
--rdf-trivio=1603 \
--rdf-per-trivio='iso8601v,ix_xywdatap2899v,ix_xywdatap4135v' \
"999999/0/1603_99966_1_0~worldbank~health~wide.no1.tm.hxl.csv" \
>"999999/0/1603_99966_1_0~worldbank~health~TEMP.no1.owl.ttl"

rdfpipe --input-format=turtle --output-format=longturtle \
"999999/0/1603_99966_1_0~worldbank~health~TEMP.no1.owl.ttl"\
>"999999/0/1603_99966_1_0~worldbank~health.no1.owl.ttl"

# ./999999999/0/999999999_54872.py --methodus=_temp_no1 --rdf-sine-spatia-nominalibus=devnull,mdciii --rdf-trivio=1603 999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.no1.tm.hxl.csv | head -n 20
# head -n 3 999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.no1.tm.hxl.csv

Expand Down

0 comments on commit 7beef2e

Please sign in to comment.