From 4b08e7ec7ee73affcdd46b63a154cb15a5980705 Mon Sep 17 00:00:00 2001 From: Emerson Rocha Date: Sun, 31 Jul 2022 13:42:03 -0300 Subject: [PATCH] 999999999_521850.py (#43): try guess the WorldBank region codes --- officina/999999999/0/999999999_521850.py | 51 +++++++++++++++++++++- officina/999999999/0/999999999_7200235.py | 4 ++ officina/999999999/0/L999999999_0.py | 53 ++++++++++++++++++++++- officina/999999999/999999_17.sh | 3 ++ 4 files changed, 109 insertions(+), 2 deletions(-) diff --git a/officina/999999999/0/999999999_521850.py b/officina/999999999/0/999999999_521850.py index 406905e..c5cb300 100755 --- a/officina/999999999/0/999999999_521850.py +++ b/officina/999999999/0/999999999_521850.py @@ -27,6 +27,7 @@ # REVISION: --- # ============================================================================== +import json import os import re import sys @@ -45,6 +46,7 @@ # Dict, # List, ) + import zipfile from L999999999_0 import ( @@ -691,6 +693,31 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout, return self.EXIT_ERROR +# ./999999999/0/999999999_7200235.py --methodus=index_praeparationi 1603_16_1_0 --index-nomini=i1603_16_1_0 --index-ad-columnam='ix_unm49' +class Adm0CodexLocali: + + i1603_16_1_0: dict = None + + def __init__( + self + ): + _path = NUMERORDINATIO_BASIM + '/999999/0/i1603_16_1_0.index.json' + if not exists(_path): + raise NotFoundErr( + "Warm up required. Use ./999999999/0/999999999_7200235.py " + " --methodus=index_praeparationi 1603_16_1_0 " + "--index-nomini=i1603_16_1_0 --index-ad-columnam='ix_unm49'") + + with open(_path, 'r') as f: + self.i1603_16_1_0 = json.load(f) + + def quod(self, res: str) -> str: + if res and res in self.i1603_16_1_0: + _v = str(int(self.i1603_16_1_0[res])) + return _v + return None + + class DataScrapping: def __init__( @@ -711,6 +738,8 @@ def __init__( self.objectivum_formato = objectivum_formato self._temp = {} + self._Adm0CodexLocali = None + def __del__(self): for clavem, res in self._temp.items(): # if clavem in ['__source_zip__', '__source_main_csv__']: @@ -743,6 +772,15 @@ def _init_temp(self): ), } + def _codicem(self, res: str, strictum: bool = False, index: int = 0) -> str: + if self._Adm0CodexLocali is None: + self._Adm0CodexLocali = Adm0CodexLocali() + _v = self._Adm0CodexLocali.quod(res) + if _v: + return _v + else: + return None if strictum else str(900 + index) + def _hxlize_dummy(self, caput: list): resultatum = [] for res in caput: @@ -902,7 +940,18 @@ def de_hxl_ad_hxltm(self, fonti: str, objetivum: str): continue if codicem_inconito is True: index_linea += 1 - linea.insert(0, str(index_linea)) + # linea.insert(0, str(index_linea)) + # _v = self._codicem(index_linea, index_linea) + + # Brute force whathever is on first 3 columns + for i in range(3): + _v = self._codicem(linea[i], strictum=True) + if _v is not None: + break + if _v is None: + _v = self._codicem( + False, index=index_linea, strictum=False) + linea.insert(0, _v) _csv_writer.writerow(linea) def de_hxltm_ad_no1(self, fonti: str, objetivum: str): diff --git a/officina/999999999/0/999999999_7200235.py b/officina/999999999/0/999999999_7200235.py index 7d461ae..807f0b7 100755 --- a/officina/999999999/0/999999999_7200235.py +++ b/officina/999999999/0/999999999_7200235.py @@ -210,6 +210,10 @@ '#item+rem+i_qcc+is_zxxx+ix_unm49=\ DATA_REFERENTIBUS(i1603_45_49;#country+code+v_iso3)' + + {0} --methodus=index_praeparationi 1603_16_1_0 \ +--index-nomini=i1603_16_1_0 --index-ad-columnam='ix_unm49' + ------------------------------------------------------------------------------ EXEMPLŌRUM GRATIĀ ------------------------------------------------------------------------------ diff --git a/officina/999999999/0/L999999999_0.py b/officina/999999999/0/L999999999_0.py index ae1c84c..60619f8 100644 --- a/officina/999999999/0/L999999999_0.py +++ b/officina/999999999/0/L999999999_0.py @@ -6500,7 +6500,11 @@ def hxltm_index_praeparationi( if not index_ad_columnam: index_ad_columnam = 0 else: - index_ad_columnam = caput.index(index_ad_columnam) + de_facto = qhxl_select( + caput, index_ad_columnam, unicum=True, strictum=True) + # print('de_facto', de_facto, caput) + # index_ad_columnam = caput.index(index_ad_columnam) + index_ad_columnam = caput.index(de_facto) _data_json = {} data_json = {} @@ -7334,6 +7338,53 @@ def qhxl_bcp47_2_hxlattr(bcp47: str) -> str: return resultatum +def qhxl_select(caput: list, query: str, unicum: bool = False, strictum: bool = False): + """qhxl_select + + Select HXL hashtags from list of header of hashtags without + need be exact + + Args: + caput (list): HXL hashtags header + query (str): query to search for parts + unicum (bool): Return just one item instead of always list + strictum (bool): Force have at least one value + + Returns: + list: 0 or more results + """ + result_filtered = [] + + if query in caput: + # Exact match + return query if unicum else [query] + + query_parts = query.replace('#', '').strip('+').split('+') + # query_parts = filter(None, query_parts) + for res in caput: + _failed = False + _res_parts = res.replace('#', '').strip('+').split('+') + # _res_parts = filter(None, _res_parts) + for _q in query_parts: + if _q not in _res_parts: + _failed = True + continue + if not _failed: + result_filtered.append(res) + + if strictum is True and len(result_filtered) == 0: + raise ValueError('<{0}> not found in <{1}>'.format(query, caput)) + + if strictum is True and unicum is True and len(result_filtered) > 1: + raise ValueError('Non unicum <{0}>: <{1}>; caput <{2}>'.format( + query, result_filtered, caput)) + + if unicum: + return result_filtered[0] + + return result_filtered + + def res_interlingualibus_formata(rem: dict, query) -> str: # pylint: disable=too-many-return-statements diff --git a/officina/999999999/999999_17.sh b/officina/999999999/999999_17.sh index 61bfea6..d5fdf44 100755 --- a/officina/999999999/999999_17.sh +++ b/officina/999999999/999999_17.sh @@ -68,6 +68,9 @@ ROOTDIR="$(pwd)" # wikidata_q_ex_totalibus "$wikiq" "999999/1603/3/45/16/1/1/1603_3_45_16_1_1.wikiq.tm.hxl.csv" set -x + +# ./999999999/0/999999999_7200235.py --methodus=index_praeparationi 1603_16_1_0 --index-nomini=i1603_16_1_0 --index-ad-columnam='ix_unm49' + ./999999999/0/999999999_521850.py \ --methodus-fonti=worldbank \ --methodus="SP.POP.TOTL" \