999999999_521850.py (#43): --methodus-fonti=worldbank --methodus=SP.P…

…OP.TOTL draft
EticaAI · Jul 27, 2022 · abe9d4b · abe9d4b
1 parent fd554ed
commit abe9d4b
Showing 1 changed file with 227 additions and 53 deletions.
diff --git a/officina/999999999/0/999999999_521850.py b/officina/999999999/0/999999999_521850.py
@@ -28,25 +28,42 @@
 import sys
 import argparse
 import csv
-import re
+# import re
 from pathlib import Path
 from os.path import exists
 
-from functools import reduce
+# from functools import reduce
 from typing import (
     Any,
     # Dict,
     # List,
 )
+import zipfile
 
 from L999999999_0 import (
     # hxltm_carricato,
-    TabulaAdHXLTM
+    NUMERORDINATIO_BASIM,
+    # TabulaAdHXLTM
 )
 
-import yaml
+import requests
 
-import xml.etree.ElementTree as XMLElementTree
+try:
+    from openpyxl import (
+        load_workbook
+    )
+except ModuleNotFoundError:
+    # Error handling
+    pass
+try:
+    import xlrd
+except ModuleNotFoundError:
+    # Error handling
+    pass
+
+# import yaml
+
+# import xml.etree.ElementTree as XMLElementTree
 
 STDIN = sys.stdin.buffer
 
@@ -66,15 +83,24 @@
 ------------------------------------------------------------------------------
                             EXEMPLŌRUM GRATIĀ
 ------------------------------------------------------------------------------
-    {0} --methodus=undata
+    {0} --methodus-fonti=undata
+
+    {0} --methodus-fonti=unhcr
 
-    {0} --methodus=unhcr
+    {0} --methodus-fonti=unochafts
 
-    {0} --methodus=unochafts
+    {0} --methodus-fonti=unwpf
 
-    {0} --methodus=unwpf
+    {0} --methodus-fonti=worldbank --methodus=SP.POP.TOTL
 
-    {0} --methodus=worldbank
+    {0} --methodus-fonti=worldbank --methodus=SP.POP.TOTL \
+--objectivum-formato=link-fonti
+
+    {0} --methodus-fonti=worldbank --methodus=SP.POP.TOTL \
+--objectivum-formato=csv
+
+    {0} --methodus-fonti=worldbank --methodus=SP.POP.TOTL \
+--objectivum-formato=hxltm
 
 ------------------------------------------------------------------------------
                             EXEMPLŌRUM GRATIĀ
@@ -151,30 +177,7 @@ def __init__(self):
         Constructs all the necessary attributes for the Cli object.
         """
 
-    def _quod_configuratio(self, archivum_configurationi: str = None) -> dict:
-        """_quod_configuratio
-
-        Args:
-            archivum_configurationi (str, optional):
-
-        Returns:
-            (dict):
-        """
-        archivae = ARCHIVUM_CONFIGURATIONI_DEFALLO
-        if archivum_configurationi is not None:
-            if not exists(archivum_configurationi):
-                raise FileNotFoundError(
-                    'archivum_configurationi {0}'.format(
-                        archivum_configurationi))
-            archivae.append(archivum_configurationi)
-
-        for item in archivae:
-            if exists(item):
-                with open(item, "r") as read_file:
-                    datum = yaml.safe_load(read_file)
-                    return datum
-
-    def make_args(self, hxl_output=True):
+    def make_args(self):
         # parser = argparse.ArgumentParser(description=DESCRIPTION)
         parser = argparse.ArgumentParser(
             prog="999999999_10263485",
@@ -190,9 +193,9 @@ def make_args(self, hxl_output=True):
         )
 
         parser.add_argument(
-            '--methodus',
-            help='Modo de operação.',
-            dest='methodus',
+            '--methodus-fonti',
+            help='External data source',
+            dest='methodus_fonti',
             nargs='?',
             choices=[
                 'undata',  # https://data.un.org/
@@ -206,6 +209,14 @@ def make_args(self, hxl_output=True):
             default='undata'
         )
 
+        parser.add_argument(
+            '--methodus',
+            help='Underlining method for the data source',
+            dest='methodus',
+            nargs='?',
+            # default=None
+            default='help'
+        )
         # objectīvum, n, s, nominativus,
         #                       https://en.wiktionary.org/wiki/objectivus#Latin
         # fōrmātō, n, s, dativus, https://en.wiktionary.org/wiki/formatus#Latin
@@ -216,11 +227,13 @@ def make_args(self, hxl_output=True):
             nargs='?',
             choices=[
                 'csv',
-                'tsv',
-                'hxl_csv',
-                'hxl_tsv',
-                'hxltm_csv',
-                'hxltm_tsv',
+                'hxltm',
+                'link-fonti',
+                # 'tsv',
+                # 'hxl_csv',
+                # 'hxl_tsv',
+                # 'hxltm_csv',
+                # 'hxltm_tsv',
             ],
             # required=True
             default='csv'
@@ -276,31 +289,192 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,
 
         # hf = CliMain(self.pyargs.infile, self.pyargs.outfile)
 
-        if pyargs.methodus == 'undata':
-            print(DATA_SCRAPPING_HELP['UNDATA'])
+        if pyargs.methodus_fonti == 'undata':
+            if pyargs.methodus == 'help':
+                print(DATA_SCRAPPING_HELP['UNDATA'])
+                return self.EXIT_OK
+            raise NotImplementedError
             return self.EXIT_OK
 
-        if pyargs.methodus == 'unhcr':
-            print(DATA_SCRAPPING_HELP['UNHCR'])
+        if pyargs.methodus_fonti == 'unhcr':
+            if pyargs.methodus == 'help':
+                print(DATA_SCRAPPING_HELP['UNHCR'])
             return self.EXIT_OK
 
-
-        if pyargs.methodus == 'unochafts':
-            print(DATA_SCRAPPING_HELP['UNOCHAFTS'])
+        if pyargs.methodus_fonti == 'unochafts':
+            if pyargs.methodus == 'help':
+                print(DATA_SCRAPPING_HELP['UNOCHAFTS'])
+                return self.EXIT_OK
+            raise NotImplementedError
             return self.EXIT_OK
 
-        if pyargs.methodus == 'unwpf':
-            print(DATA_SCRAPPING_HELP['UNWPF'])
+        if pyargs.methodus_fonti == 'unwpf':
+            if pyargs.methodus == 'help':
+                print(DATA_SCRAPPING_HELP['UNWPF'])
+            raise NotImplementedError
             return self.EXIT_OK
 
-        if pyargs.methodus == 'worldbank':
-            print(DATA_SCRAPPING_HELP['WORLDBANK'])
+        if pyargs.methodus_fonti == 'worldbank':
+            # print(DATA_SCRAPPING_HELP['WORLDBANK'])
+            ds_worldbank = DataScrappingWorldbank(
+                pyargs.methodus, pyargs.objectivum_formato)
+            ds_worldbank.praeparatio()
+            ds_worldbank.imprimere()
             return self.EXIT_OK
 
         print('Unknow option.')
         return self.EXIT_ERROR
 
 
+class DataScrapping:
+
+    def _hxlize_dummy(self, caput: list):
+        resultatum = []
+        for res in caput:
+            if not res:
+                resultatum.append('')
+            else:
+                resultatum.append(
+                    '#meta+{0}'.format(res.lower().strip().replace(' ',
+                                       '').replace('-', '_'))
+                )
+        return resultatum
+
+    def de_csv_ad_csvnorm(self, fonti: str, objetivum: str, caput_initiali: list):
+        # print("TODO de_csv_ad_csvnorm")
+        with open(objetivum, 'w') as _objetivum:
+            with open(fonti, 'r') as _fons:
+                _csv_reader = csv.reader(_fons)
+                _csv_writer = csv.writer(_objetivum)
+                started = False
+                for linea in _csv_reader:
+                    # print(linea)
+                    if not started:
+                        if linea and linea[0].strip() in caput_initiali:
+                            started = True
+                        else:
+                            continue
+                    _csv_writer.writerow(linea)
+
+        # print("TODO")
+    def de_csv_ad_hxltm(self, fonti: str, objetivum: str, caput_initiali: list):
+        # print("TODO de_csv_ad_csvnorm")
+        with open(objetivum, 'w') as _objetivum:
+            with open(fonti, 'r') as _fons:
+                _csv_reader = csv.reader(_fons)
+                _csv_writer = csv.writer(_objetivum)
+                started = False
+                for linea in _csv_reader:
+                    # print(linea)
+                    if not started:
+                        if linea and linea[0].strip() in caput_initiali:
+                            started = True
+                            # @TODO remove this draft part
+                            _csv_writer.writerow(self._hxlize_dummy(linea))
+                            continue
+                        else:
+                            continue
+                    _csv_writer.writerow(linea)
+
+        # print("TODO")
+
+
+class DataScrappingWorldbank(DataScrapping):
+
+    methodus: str = 'SP.POP.TOTL'
+    objectivum_formato: str = 'csv'
+    # link_fonti: str = 'https://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=excel'
+    link_fonti: str = 'https://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=csv'
+    temp_fonti_csv: str = ''
+    temp_fonti_hxltm: str = ''
+
+    def __init__(self, methodus: str, objectivum_formato: str):
+
+        self.methodus = methodus
+        self.objectivum_formato = objectivum_formato
+
+        # print('oioioi', self.dictionaria_codex )
+
+    def imprimere(self, formatum: str = None) -> list:
+
+        if self.objectivum_formato == 'link-fonti':
+            print(self.link_fonti)
+            return True
+
+        fonti = self.temp_fonti_csv
+        if self.objectivum_formato == 'hxltm':
+            fonti = self.temp_fonti_hxltm
+
+        with open(fonti, 'r') as _fons:
+            _csv_reader = csv.reader(_fons)
+            _csv_writer = csv.writer(sys.stdout)
+            for linea in _csv_reader:
+                _csv_writer.writerow(linea)
+
+    def praeparatio(self):
+        """praeparātiō
+
+        Trivia:
+        - praeparātiō, s, f, Nom., https://en.wiktionary.org/wiki/praeparatio
+        """
+        if self.objectivum_formato == 'link-fonti':
+            # print(self.link_fonti)
+            return True
+        # self.temp_fonti = '{0}/999999/0/{1}~{2}.xls'.format(
+        #     NUMERORDINATIO_BASIM, __class__.__name__, self.methodus
+        # )
+        temp_fonti_zip = '{0}/999999/0/{1}~{2}.zip'.format(
+            NUMERORDINATIO_BASIM, __class__.__name__, self.methodus
+        )
+        self.temp_fonti_csv = '{0}/999999/0/{1}~{2}.csv'.format(
+            NUMERORDINATIO_BASIM, __class__.__name__, self.methodus
+        )
+        temp_fonti_csvnorm = '{0}/999999/0/{1}~{2}.norm.csv'.format(
+            NUMERORDINATIO_BASIM, __class__.__name__, self.methodus
+        )
+        self.temp_fonti_hxltm = '{0}/999999/0/{1}~{2}.tm.hxl.csv'.format(
+            NUMERORDINATIO_BASIM, __class__.__name__, self.methodus
+        )
+
+        if not exists(temp_fonti_zip):
+            # Download to local cache if alreayd there
+            r = requests.get(self.link_fonti)
+            with open(temp_fonti_zip, 'wb') as f:
+                f.write(r.content)
+
+        # zip file handler
+        zip = zipfile.ZipFile(temp_fonti_zip)
+        data_file_main = ''
+        for _res in zip.namelist():
+            if _res.lower().find('meta') > -1:
+                continue
+            if _res.lower().startswith('api_'):
+                data_file_main = _res
+        # list available files in the container
+        # print(zip.namelist())
+
+        # extract a specific file from the zip container
+        f = zip.open(data_file_main)
+
+        # save the extraced file
+        content = f.read()
+        f = open(self.temp_fonti_csv, 'wb')
+        f.write(content)
+        f.close()
+
+        if self.objectivum_formato == 'hxltm':
+            # print(self.link_fonti)
+            self.de_csv_ad_csvnorm(
+                self.temp_fonti_csv, temp_fonti_csvnorm, [
+                    'Country Name', 'Country Code'
+                ]
+            )
+            return self.de_csv_ad_hxltm(
+                temp_fonti_csvnorm, self.temp_fonti_hxltm, [
+                    'Country Name', 'Country Code'
+                ]
+            )
+
 
 if __name__ == "__main__":