diff --git a/officina/999999999/0/999999999_521850.py b/officina/999999999/0/999999999_521850.py index b1eb663..e9ecd51 100755 --- a/officina/999999999/0/999999999_521850.py +++ b/officina/999999999/0/999999999_521850.py @@ -1153,39 +1153,25 @@ def de_hxltm_ad_hxltm_wide( """ # data_sorted = self._data_sort(fonti) - data_sorted = hxltm__data_sort( - fonti, ['#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio']) + + if '#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio' in self._caput: + data_sorted = hxltm__data_sort( + fonti, ['#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio']) + else: + data_sorted = hxltm__data_sort(fonti) caput, data = hxltm__data_pivot_wide(data_sorted[0], data_sorted[1:]) # print(data_sorted[0:10]) - print(caput, data[0:10]) + # print(caput, data[0:10]) - raise NotImplementedError + # raise NotImplementedError with open(objetivum, 'w') as _objetivum: # with open(fonti, 'r') as _fons: _csv_writer = csv.writer(_objetivum) - for linea in data_sorted: - # _csv_reader = csv.reader(_fons) - # _csv_writer = csv.writer(_objetivum) - started = False - # for linea in _csv_reader: - if not started: - started = True - caput = linea - # caput = self._no1lize(linea) - # if '#item+conceptum+numerordinatio' not in caput: - # numerordinatio_inconito = True - # codicem_index = caput.index( - # '#item+conceptum+codicem') - # caput.insert(0, '#item+conceptum+numerordinatio') - self._caput = caput - _csv_writer.writerow(caput) - continue - # if numerordinatio_inconito is True: - # linea.insert(0, '{0}:{1}'.format( - # self.numerordinatio_praefixo, linea[codicem_index])) + _csv_writer.writerow(caput) + for linea in data: _csv_writer.writerow(linea) def de_hxltm_ad_no1(self, fonti: str, objetivum: str): @@ -1587,21 +1573,36 @@ def praeparatio(self): self._temp['hxl'], self._temp['hxltm'], hxl_vocab=hxl_vocab ) - if self.objectivum_formato in ['hxltm-wide', 'no1'] or \ - (self.objectivum_formato == 'no1' and - '#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio' in self._caput): + if self.objectivum_formato == 'hxltm-wide': hxl_vocab = False - if self.methodus == 'health': - self._hxlPivot = DATA_HXL_DE_CSV_REGEX['worldbank'] - hxl_vocab = True + # if self.methodus == 'health': + # self._hxlPivot = DATA_HXL_DE_CSV_REGEX['worldbank'] + # hxl_vocab = True + self.de_hxltm_ad_hxltm_wide( + self._temp['hxltm'], self._temp['hxltm_wide'] + ) + + # We also generate wide data implicitly if result needs it + if self.objectivum_formato == 'no1' and \ + ('#item+rem+i_qcc+is_zxxx+ix_xyadhxltrivio' in self._caput): + hxl_vocab = False + # if self.methodus == 'health': + # self._hxlPivot = DATA_HXL_DE_CSV_REGEX['worldbank'] + # hxl_vocab = True self.de_hxltm_ad_hxltm_wide( self._temp['hxltm'], self._temp['hxltm_wide'] ) - if self.objectivum_formato in ['no1']: + self.de_hxltm_ad_no1( + self._temp['hxltm_wide'], self._temp['no1'] + ) + + elif self.objectivum_formato in ['no1']: self.de_hxltm_ad_no1( self._temp['hxltm'], self._temp['no1'] ) + else: + SyntaxError('{}??'.format(self.objectivum_formato)) if __name__ == "__main__": diff --git a/officina/999999999/0/L999999999_0.py b/officina/999999999/0/L999999999_0.py index a9feba9..dc4a5fd 100644 --- a/officina/999999999/0/L999999999_0.py +++ b/officina/999999999/0/L999999999_0.py @@ -5470,6 +5470,9 @@ def hxltm__data_pivot_wide(caput: list, data: list) -> list: sorted(referens_hxlattrs) + + # raise ValueError(len(referens_hxlattrs) * len(referens_ad_indici)) + columna_novae__list = [] columna_novae__mapping = {} @@ -5481,13 +5484,19 @@ def hxltm__data_pivot_wide(caput: list, data: list) -> list: if res in columna_novae__list: continue columna_novae__list.append(res) - # columna_novae__mapping[item_I] = columna_novae__list.index(res) - columna_novae__mapping[item_I] = len(columna_novae__list) - 1 + columna_novae__mapping[item_I] = columna_novae__list.index(res) + # columna_novae__mapping[item_I] = len(columna_novae__list) - 1 # pass # columna_novae__list data_novae__dict = {} _codice_indici = caput.index('#item+conceptum+codicem') + _matrix_size = len(referens_hxlattrs) * len(referens_ad_indici) + # raise ValueError(_matrix_size, [''] * _matrix_size) + + _do_not_merge = [] + # These already will be on data + _do_not_merge.extend(referens_ad_indici) for linea in data: _codicem = linea[_codice_indici] _referens = linea[referens_per_indici] @@ -5497,67 +5506,68 @@ def hxltm__data_pivot_wide(caput: list, data: list) -> list: data_novae__dict[_codicem] = { 'originalis': linea, # 'data_novae': [''] * len(columna_novae__list), - 'data_novae': [''] * (len(columna_novae__list) + 1), + # 'data_novae': [''] * (len(columna_novae__list) + 1), + 'data_novae': [''] * _matrix_size, + 'data_meta': [], } + else: + # This will try to compare if we can safely add columns that would + # be equal anyway + for item_index, item_value in enumerate(linea): + if item_index in _do_not_merge: + continue + if item_value != \ + data_novae__dict[_codicem]['originalis'][item_index]: + _do_not_merge.append(item_index) __loop = 0 + # __loop = -1 # print(len(data_novae__dict[_codicem]['data_novae'])) + # raise NotImplementedError(referens_ad_indici) for index_originalis in referens_ad_indici: # index_novae = __start + index_originalis # print(__loop, len(data_novae__dict[_codicem]['data_novae'])) index_novae = __start + __loop - if index_novae not in data_novae__dict[_codicem]['data_novae']: - print('error', index_novae, len(data_novae__dict[_codicem]['data_novae'])) - continue + if not index_novae < len(data_novae__dict[_codicem]['data_novae']): + break + # if index_novae not in data_novae__dict[_codicem]['data_novae']: + # print('error', __start, index_novae, len(data_novae__dict[_codicem]['data_novae'])) + # continue + # print('antes', index_novae, len(data_novae__dict[_codicem]['data_novae'])) data_novae__dict[_codicem]['data_novae'][index_novae] = \ linea[index_originalis] + # print('depois', index_novae) + # data_novae__dict[_codicem]['data_novae'][index_originalis] = \ + # linea[index_originalis] __loop += 1 + for codicem in data_novae__dict: + for _old_index, _old_value in enumerate(data_novae__dict[codicem]['originalis']): + if _old_index not in _do_not_merge: + data_novae__dict[codicem]['data_meta'].append(_old_value) + # for item in caput: # for item_II in referens_ad_indici: # pass + # raise ValueError(_old_index) + _caput_novo_meta = [] + for _old_index, _old_caput in enumerate(caput): + if _old_index not in _do_not_merge: + _caput_novo_meta.append(_old_caput) - raise NotImplementedError(data_novae__dict['4']) - raise NotImplementedError(columna_novae__list, columna_novae__mapping) - - # if not sortkeys: - # sortkeys = [] - - # if '#item+conceptum+codicem' not in sortkeys: - # sortkeys.insert(0, '#item+conceptum+codicem') - - # _data = [] - # caput = [] - # with open(fonti, 'r') as _fons: - # _csv_reader = csv.reader(_fons) - # # started = False - # for linea in _csv_reader: - # if len(caput) == 0: - # caput = linea - # continue - # _data.append(linea) - - # _i0 = caput.index(sortkeys[0]) - # if len(sortkeys) == 1: - # _data = sorted(_data, key=lambda row: int(row[_i0])) - # elif len(sortkeys) == 2: - # _i1 = caput.index(sortkeys[1]) - # _data = sorted(_data, key=lambda row: (int(row[_i0]), row[_i1])) - # elif len(sortkeys) == 3: - # _i1 = caput.index(sortkeys[1]) - # _i2 = caput.index(sortkeys[2]) - # _data = sorted( - # _data, key=lambda row: (int(row[_i0]), row[_i1], row[_i2])) - # else: - # raise NotImplementedError('len > 3; [{}] <{}>'.format( - # len(sortkeys), sortkeys)) + caput_novo = _caput_novo_meta + columna_novae__list + data_novo = [] + for codicem in data_novae__dict: + data_novo.append(data_novae__dict[codicem]['data_meta'] \ + + data_novae__dict[_codicem]['data_novae']) - # resultatum = [] - # resultatum.append(caput) - # resultatum.extend(_data) + # raise NotImplementedError(caput_novo, data_novo[0]) + # raise NotImplementedError(data_novae__dict['4']) + # raise NotImplementedError(columna_novae__list, columna_novae__mapping) - return caput, data + + return caput_novo, data_novo def hxltm__concat( diff --git a/officina/999999999/999999_17.sh b/officina/999999999/999999_17.sh index ff4b20c..c44c4e4 100755 --- a/officina/999999999/999999_17.sh +++ b/officina/999999999/999999_17.sh @@ -76,6 +76,8 @@ set -x --objectivum-formato=hxltm \ >"999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.tm.hxl.csv" +frictionless validate "999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.tm.hxl.csv" + ./999999999/0/999999999_521850.py \ --methodus-fonti=worldbank \ --methodus="SP.POP.TOTL" \ @@ -83,6 +85,8 @@ set -x --numerordinatio-praefixo="1603_992_1_0" \ >"999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.no1.tm.hxl.csv" +frictionless validate "999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.no1.tm.hxl.csv" + ./999999999/0/999999999_54872.py \ --methodus=_temp_no1 \ --rdf-sine-spatia-nominalibus=devnull,mdciii \ @@ -102,18 +106,55 @@ rdfpipe --input-format=turtle --output-format=longturtle \ --objectivum-formato=hxl \ >"999999/0/1603_99966_1_0~worldbank~health.hxl.csv" +frictionless validate "999999/0/1603_99966_1_0~worldbank~health.hxl.csv" + ./999999999/0/999999999_521850.py \ --methodus-fonti=worldbank \ --methodus="health" \ --objectivum-formato=hxltm \ >"999999/0/1603_99966_1_0~worldbank~health.tm.hxl.csv" +frictionless validate "999999/0/1603_99966_1_0~worldbank~health.tm.hxl.csv" + ./999999999/0/999999999_521850.py \ --methodus-fonti=worldbank \ --methodus="health" \ --objectivum-formato=hxltm-wide \ >"999999/0/1603_99966_1_0~worldbank~health~wide.tm.hxl.csv" +frictionless validate "999999/0/1603_99966_1_0~worldbank~health~wide.tm.hxl.csv" + +./999999999/0/999999999_521850.py \ + --methodus-fonti=worldbank \ + --methodus="SP.POP.TOTL" \ + --objectivum-formato=no1 \ + --numerordinatio-praefixo="1603_992_1_0" \ + >"999999/0/1603_99966_1_0~worldbank~health~wide.no1.tm.hxl.csv" + + +frictionless validate "999999/0/1603_99966_1_0~worldbank~health~wide.no1.tm.hxl.csv" + +# @TODO add other pivots +# ./999999999/0/999999999_54872.py \ +# --methodus=_temp_no1 \ +# --rdf-sine-spatia-nominalibus=devnull,mdciii \ +# --rdf-trivio=1603 \ +# --rdf-per-trivio='iso8601v' \ +# "999999/0/1603_99966_1_0~worldbank~health~wide.no1.tm.hxl.csv" \ +# >"999999/0/1603_99966_1_0~worldbank~health~TEMP.no1.owl.ttl" + +./999999999/0/999999999_54872.py \ + --methodus=_temp_no1 \ + --rdf-sine-spatia-nominalibus=devnull,mdciii \ + --rdf-trivio=1603 \ + --rdf-per-trivio='iso8601v,ix_xywdatap2899v,ix_xywdatap4135v' \ + "999999/0/1603_99966_1_0~worldbank~health~wide.no1.tm.hxl.csv" \ + >"999999/0/1603_99966_1_0~worldbank~health~TEMP.no1.owl.ttl" + +rdfpipe --input-format=turtle --output-format=longturtle \ + "999999/0/1603_99966_1_0~worldbank~health~TEMP.no1.owl.ttl"\ + >"999999/0/1603_99966_1_0~worldbank~health.no1.owl.ttl" + # ./999999999/0/999999999_54872.py --methodus=_temp_no1 --rdf-sine-spatia-nominalibus=devnull,mdciii --rdf-trivio=1603 999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.no1.tm.hxl.csv | head -n 20 # head -n 3 999999/0/1603_992_1_0~worldbank~SP_POP_TOTL.no1.tm.hxl.csv