diff --git a/officina/999999999/0/999999999_521850.py b/officina/999999999/0/999999999_521850.py index db718a2..a7ba033 100755 --- a/officina/999999999/0/999999999_521850.py +++ b/officina/999999999/0/999999999_521850.py @@ -28,25 +28,42 @@ import sys import argparse import csv -import re +# import re from pathlib import Path from os.path import exists -from functools import reduce +# from functools import reduce from typing import ( Any, # Dict, # List, ) +import zipfile from L999999999_0 import ( # hxltm_carricato, - TabulaAdHXLTM + NUMERORDINATIO_BASIM, + # TabulaAdHXLTM ) -import yaml +import requests -import xml.etree.ElementTree as XMLElementTree +try: + from openpyxl import ( + load_workbook + ) +except ModuleNotFoundError: + # Error handling + pass +try: + import xlrd +except ModuleNotFoundError: + # Error handling + pass + +# import yaml + +# import xml.etree.ElementTree as XMLElementTree STDIN = sys.stdin.buffer @@ -66,15 +83,24 @@ ------------------------------------------------------------------------------ EXEMPLŌRUM GRATIĀ ------------------------------------------------------------------------------ - {0} --methodus=undata + {0} --methodus-fonti=undata + + {0} --methodus-fonti=unhcr - {0} --methodus=unhcr + {0} --methodus-fonti=unochafts - {0} --methodus=unochafts + {0} --methodus-fonti=unwpf - {0} --methodus=unwpf + {0} --methodus-fonti=worldbank --methodus=SP.POP.TOTL - {0} --methodus=worldbank + {0} --methodus-fonti=worldbank --methodus=SP.POP.TOTL \ +--objectivum-formato=link-fonti + + {0} --methodus-fonti=worldbank --methodus=SP.POP.TOTL \ +--objectivum-formato=csv + + {0} --methodus-fonti=worldbank --methodus=SP.POP.TOTL \ +--objectivum-formato=hxltm ------------------------------------------------------------------------------ EXEMPLŌRUM GRATIĀ @@ -151,30 +177,7 @@ def __init__(self): Constructs all the necessary attributes for the Cli object. """ - def _quod_configuratio(self, archivum_configurationi: str = None) -> dict: - """_quod_configuratio - - Args: - archivum_configurationi (str, optional): - - Returns: - (dict): - """ - archivae = ARCHIVUM_CONFIGURATIONI_DEFALLO - if archivum_configurationi is not None: - if not exists(archivum_configurationi): - raise FileNotFoundError( - 'archivum_configurationi {0}'.format( - archivum_configurationi)) - archivae.append(archivum_configurationi) - - for item in archivae: - if exists(item): - with open(item, "r") as read_file: - datum = yaml.safe_load(read_file) - return datum - - def make_args(self, hxl_output=True): + def make_args(self): # parser = argparse.ArgumentParser(description=DESCRIPTION) parser = argparse.ArgumentParser( prog="999999999_10263485", @@ -190,9 +193,9 @@ def make_args(self, hxl_output=True): ) parser.add_argument( - '--methodus', - help='Modo de operação.', - dest='methodus', + '--methodus-fonti', + help='External data source', + dest='methodus_fonti', nargs='?', choices=[ 'undata', # https://data.un.org/ @@ -206,6 +209,14 @@ def make_args(self, hxl_output=True): default='undata' ) + parser.add_argument( + '--methodus', + help='Underlining method for the data source', + dest='methodus', + nargs='?', + # default=None + default='help' + ) # objectīvum, n, s, nominativus, # https://en.wiktionary.org/wiki/objectivus#Latin # fōrmātō, n, s, dativus, https://en.wiktionary.org/wiki/formatus#Latin @@ -216,11 +227,13 @@ def make_args(self, hxl_output=True): nargs='?', choices=[ 'csv', - 'tsv', - 'hxl_csv', - 'hxl_tsv', - 'hxltm_csv', - 'hxltm_tsv', + 'hxltm', + 'link-fonti', + # 'tsv', + # 'hxl_csv', + # 'hxl_tsv', + # 'hxltm_csv', + # 'hxltm_tsv', ], # required=True default='csv' @@ -276,31 +289,192 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout, # hf = CliMain(self.pyargs.infile, self.pyargs.outfile) - if pyargs.methodus == 'undata': - print(DATA_SCRAPPING_HELP['UNDATA']) + if pyargs.methodus_fonti == 'undata': + if pyargs.methodus == 'help': + print(DATA_SCRAPPING_HELP['UNDATA']) + return self.EXIT_OK + raise NotImplementedError return self.EXIT_OK - if pyargs.methodus == 'unhcr': - print(DATA_SCRAPPING_HELP['UNHCR']) + if pyargs.methodus_fonti == 'unhcr': + if pyargs.methodus == 'help': + print(DATA_SCRAPPING_HELP['UNHCR']) return self.EXIT_OK - - if pyargs.methodus == 'unochafts': - print(DATA_SCRAPPING_HELP['UNOCHAFTS']) + if pyargs.methodus_fonti == 'unochafts': + if pyargs.methodus == 'help': + print(DATA_SCRAPPING_HELP['UNOCHAFTS']) + return self.EXIT_OK + raise NotImplementedError return self.EXIT_OK - if pyargs.methodus == 'unwpf': - print(DATA_SCRAPPING_HELP['UNWPF']) + if pyargs.methodus_fonti == 'unwpf': + if pyargs.methodus == 'help': + print(DATA_SCRAPPING_HELP['UNWPF']) + raise NotImplementedError return self.EXIT_OK - if pyargs.methodus == 'worldbank': - print(DATA_SCRAPPING_HELP['WORLDBANK']) + if pyargs.methodus_fonti == 'worldbank': + # print(DATA_SCRAPPING_HELP['WORLDBANK']) + ds_worldbank = DataScrappingWorldbank( + pyargs.methodus, pyargs.objectivum_formato) + ds_worldbank.praeparatio() + ds_worldbank.imprimere() return self.EXIT_OK print('Unknow option.') return self.EXIT_ERROR +class DataScrapping: + + def _hxlize_dummy(self, caput: list): + resultatum = [] + for res in caput: + if not res: + resultatum.append('') + else: + resultatum.append( + '#meta+{0}'.format(res.lower().strip().replace(' ', + '').replace('-', '_')) + ) + return resultatum + + def de_csv_ad_csvnorm(self, fonti: str, objetivum: str, caput_initiali: list): + # print("TODO de_csv_ad_csvnorm") + with open(objetivum, 'w') as _objetivum: + with open(fonti, 'r') as _fons: + _csv_reader = csv.reader(_fons) + _csv_writer = csv.writer(_objetivum) + started = False + for linea in _csv_reader: + # print(linea) + if not started: + if linea and linea[0].strip() in caput_initiali: + started = True + else: + continue + _csv_writer.writerow(linea) + + # print("TODO") + def de_csv_ad_hxltm(self, fonti: str, objetivum: str, caput_initiali: list): + # print("TODO de_csv_ad_csvnorm") + with open(objetivum, 'w') as _objetivum: + with open(fonti, 'r') as _fons: + _csv_reader = csv.reader(_fons) + _csv_writer = csv.writer(_objetivum) + started = False + for linea in _csv_reader: + # print(linea) + if not started: + if linea and linea[0].strip() in caput_initiali: + started = True + # @TODO remove this draft part + _csv_writer.writerow(self._hxlize_dummy(linea)) + continue + else: + continue + _csv_writer.writerow(linea) + + # print("TODO") + + +class DataScrappingWorldbank(DataScrapping): + + methodus: str = 'SP.POP.TOTL' + objectivum_formato: str = 'csv' + # link_fonti: str = 'https://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=excel' + link_fonti: str = 'https://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=csv' + temp_fonti_csv: str = '' + temp_fonti_hxltm: str = '' + + def __init__(self, methodus: str, objectivum_formato: str): + + self.methodus = methodus + self.objectivum_formato = objectivum_formato + + # print('oioioi', self.dictionaria_codex ) + + def imprimere(self, formatum: str = None) -> list: + + if self.objectivum_formato == 'link-fonti': + print(self.link_fonti) + return True + + fonti = self.temp_fonti_csv + if self.objectivum_formato == 'hxltm': + fonti = self.temp_fonti_hxltm + + with open(fonti, 'r') as _fons: + _csv_reader = csv.reader(_fons) + _csv_writer = csv.writer(sys.stdout) + for linea in _csv_reader: + _csv_writer.writerow(linea) + + def praeparatio(self): + """praeparātiō + + Trivia: + - praeparātiō, s, f, Nom., https://en.wiktionary.org/wiki/praeparatio + """ + if self.objectivum_formato == 'link-fonti': + # print(self.link_fonti) + return True + # self.temp_fonti = '{0}/999999/0/{1}~{2}.xls'.format( + # NUMERORDINATIO_BASIM, __class__.__name__, self.methodus + # ) + temp_fonti_zip = '{0}/999999/0/{1}~{2}.zip'.format( + NUMERORDINATIO_BASIM, __class__.__name__, self.methodus + ) + self.temp_fonti_csv = '{0}/999999/0/{1}~{2}.csv'.format( + NUMERORDINATIO_BASIM, __class__.__name__, self.methodus + ) + temp_fonti_csvnorm = '{0}/999999/0/{1}~{2}.norm.csv'.format( + NUMERORDINATIO_BASIM, __class__.__name__, self.methodus + ) + self.temp_fonti_hxltm = '{0}/999999/0/{1}~{2}.tm.hxl.csv'.format( + NUMERORDINATIO_BASIM, __class__.__name__, self.methodus + ) + + if not exists(temp_fonti_zip): + # Download to local cache if alreayd there + r = requests.get(self.link_fonti) + with open(temp_fonti_zip, 'wb') as f: + f.write(r.content) + + # zip file handler + zip = zipfile.ZipFile(temp_fonti_zip) + data_file_main = '' + for _res in zip.namelist(): + if _res.lower().find('meta') > -1: + continue + if _res.lower().startswith('api_'): + data_file_main = _res + # list available files in the container + # print(zip.namelist()) + + # extract a specific file from the zip container + f = zip.open(data_file_main) + + # save the extraced file + content = f.read() + f = open(self.temp_fonti_csv, 'wb') + f.write(content) + f.close() + + if self.objectivum_formato == 'hxltm': + # print(self.link_fonti) + self.de_csv_ad_csvnorm( + self.temp_fonti_csv, temp_fonti_csvnorm, [ + 'Country Name', 'Country Code' + ] + ) + return self.de_csv_ad_hxltm( + temp_fonti_csvnorm, self.temp_fonti_hxltm, [ + 'Country Name', 'Country Code' + ] + ) + if __name__ == "__main__":