Skip to content

Commit

Permalink
999999999_10263485.py (#42): draft of XML iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed May 16, 2022
1 parent 63e299e commit fca3841
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 25 deletions.
153 changes: 139 additions & 14 deletions officinam/999999999/0/999999999_10263485.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import csv
import re

import xml.etree.ElementTree as XMLElementTree

STDIN = sys.stdin.buffer

DESCRIPTION = """
Expand All @@ -45,6 +47,20 @@
é o Cadastro Nacional de Estabelecimentos de Saúde (CNES), (...)"
""".format(__file__)

__EPILOGUM__ = """
------------------------------------------------------------------------------
EXEMPLŌRUM GRATIĀ
------------------------------------------------------------------------------
{0} --methodus=datasus-xmlcnae 999999/0/xmlCNES.xml
cat 999999/0/xmlCNES.xml | {0} --methodus=datasus-xmlcnae
@TODO: fazer funcionar com stream de XML (não apenas por arquivo)
------------------------------------------------------------------------------
EXEMPLŌRUM GRATIĀ
------------------------------------------------------------------------------
""".format(__file__)

LIKELY_NUMERIC = [
'#item+conceptum+codicem',
'#status+conceptum',
Expand All @@ -56,52 +72,105 @@


class Cli:

EXIT_OK = 0
EXIT_ERROR = 1
EXIT_SYNTAX = 2

def __init__(self):
"""
Constructs all the necessary attributes for the Cli object.
"""

def make_args(self, hxl_output=True):
parser = argparse.ArgumentParser(description=DESCRIPTION)
# parser = argparse.ArgumentParser(description=DESCRIPTION)
parser = argparse.ArgumentParser(
prog="999999999_10263485",
description=DESCRIPTION,
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__EPILOGUM__
)

parser.add_argument(
'infile',
help='Input file',
nargs='?'
)

parser.add_argument(
'outfile',
help='Output file',
nargs='?'
'--methodus',
help='Modo de operação.',
dest='methodus',
nargs='?',
choices=[
'datasus-xmlcnae',
# 'data-apothecae',
# 'hxltm-explanationi',
# 'opus-temporibus',
# 'status-quo',
# 'deprecatum-dictionaria-numerordinatio'
],
# required=True
default='datasus-xmlcnae'
)

# parser.add_argument(
# 'outfile',
# help='Output file',
# nargs='?'
# )

return parser.parse_args()

def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,
stderr=sys.stderr):
self.pyargs = pyargs
# self.pyargs = pyargs

_infile = None
_stdin = None

if stdin.isatty():
# print("ERROR. Please pipe data in. \nExample:\n"
# " cat data.txt | {0} --actionem-quod-sparql\n"
# " printf \"Q1065\\nQ82151\\n\" | {0} --actionem-quod-sparql"
# "".format(__file__))
print('non stdin')
_infile = pyargs.infile
# return self.EXIT_ERROR
else:
print('est stdin')
_stdin = stdin

print(pyargs.methodus)
print(pyargs)

hf = HotfixCSV(self.pyargs.infile, self.pyargs.outfile)
hf.execute()
if _stdin is not None:
for line in sys.stdin:
print('oi')
codicem = line.replace('\n', ' ').replace('\r', '')

# def output(self, output_collectiom):
# for item in output_collectiom:
# print(item)
# hf = CliMain(self.pyargs.infile, self.pyargs.outfile)
climain = CliMain(infile=_infile, stdin=_stdin)
if pyargs.methodus == 'datasus-xmlcnae':
return climain.execute_ex_datasus_xmlcnae()

# return self.EXIT_OK
print('Unknow option.')
return self.EXIT_ERROR


class HotfixCSV:
class CliMain:
"""Remove .0 at the end of CSVs from data exported from XLSX and likely
to have numeric values (and trigger weird bugs)
"""

def __init__(self, infile, outfile):
def __init__(self, infile: str = None, stdin=None):
"""
Constructs all the necessary attributes for the Cli object.
"""
self.infile = infile
self.outfile = outfile
self.stdin = stdin

# self.outfile = outfile
self.header = []
self.header_index_fix = []

Expand All @@ -121,6 +190,62 @@ def process_row(self, row: list) -> list:
row[index_fix] = re.sub('\.0$', '', row[index_fix].strip())
return row

def execute_ex_datasus_xmlcnae(self):
print('@TODO copy logic from https://github.com/EticaAI/hxltm/blob/main/bin/hxltmdexml.py')

_source = self.infile if self.infile is not None else self.stdin

# self.iteratianem = XMLElementTree.iterparse(
iteratianem = XMLElementTree.iterparse(
# source=self.fontem_archivum,
# source=self.infile,
source=_source,
events=('start', 'end')
# events=('end')
)

# print(iteratianem)

# for item in iteratianem:
# print(item)
# print(item.text)

# for event, elem in ET.iterparse(file_path, events=("start", "end")):
caput = []
for event, elem in iteratianem:
if event == 'start':
# path.append(elem.tag)
pass
elif event == 'end':
# print(elem)
if hasattr(elem, 'attrib'):
lineam = []

for clavem in list(elem.attrib):
if len(caput) == 0:
caput.append(clavem)
lineam.append(elem.attrib[clavem])
# print(clavem, elem.attrib[clavem])
# print(caput)
print(lineam)
# process the tag
# if elem.tag == 'name':
# if 'members' in path:
# print 'member'
# else:
# print 'nonmember'
# path.pop()

return Cli.EXIT_OK
with open(self.infile, newline='') as infilecsv:
with open(self.outfile, 'w', newline='') as outfilecsv:
spamreader = csv.reader(infilecsv)
spamwriter = csv.writer(outfilecsv)
for row in spamreader:
# spamwriter.writerow(row)
spamwriter.writerow(self.process_row(row))
# self.data.append(row)

def execute(self):
with open(self.infile, newline='') as infilecsv:
with open(self.outfile, 'w', newline='') as outfilecsv:
Expand Down
48 changes: 45 additions & 3 deletions officinam/999999999/999999999.lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# - pip3 install s3cmd
# - ssconvert (sudo apt install gnumeric)
# - wget
# - unzip
#
# BUGS: ---
# NOTES: ---
Expand Down Expand Up @@ -277,21 +278,62 @@ archivum_speculo_ex_ftp() {
# fontī, m, s, dativus, https://en.wiktionary.org/wiki/fons#Latin

objectivum_basi="${ROOTDIR}/999999/0/0"
objectivum_archivum="${ROOTDIR}/999999/0/0/${archivum_fonti_ftp}"
objectivum_archivum="${ROOTDIR}/999999/0/0/${archivum_fonti_ftp/ftp:\/\//''}"

blue=$(tput setaf 4)
normal=$(tput sgr0)
printf "\t%40s\n" "${blue}${FUNCNAME[0]} [$archivum_fonti_ftp] \
--> [$objectivum_archivum]${normal}"

echo "${FUNCNAME[0]} ... [$archivum_fonti_ftp] --> [$objectivum_archivum]"
# echo "${FUNCNAME[0]} ... [$archivum_fonti_ftp] --> [$objectivum_archivum]"
# exit 0

if [ ! -d "${objectivum_basi}" ]; then
mkdir "${objectivum_basi}"
fi

# cd "${objectivum_basi}"
set -x
# set -x
wget --directory-prefix "${objectivum_basi}" \
--mirror "$archivum_fonti_ftp"
}

#######################################
# Extract file from zip archive on disk to target path
#
# Globals:
# ROOTDIR
# Arguments:
# archivum_fonti_ex_zip
# archivum_fonti_ad_zip
# archivum_objectivo
# Outputs:
# File(s) on disk
#######################################
archivum_unzip() {
archivum_fonti_ex_zip="$1"
archivum_fonti_ad_zip="$2"
archivum_objectivo="$3"

# archīvum, n, s, nominativus, https://en.wiktionary.org/wiki/archivum
# fontī, m, s, dativus, https://en.wiktionary.org/wiki/fons#Latin
# objectīvō, n, s, dativus, https://en.wiktionary.org/wiki/fons#Latin

objectivum_basi="${ROOTDIR}/999999/0/0"
objectivum_archivum="${ROOTDIR}/999999/0/0/${archivum_fonti_ftp}"

blue=$(tput setaf 4)
normal=$(tput sgr0)
printf "\t%40s\n" "${blue}${FUNCNAME[0]} [${archivum_fonti_ex_zip}] \
/ [${archivum_fonti_ad_zip}] --> [${archivum_objectivo}]${normal}"

# set -x
unzip -p "${archivum_fonti_ex_zip}" \
"${archivum_fonti_ad_zip}" \
>"${archivum_objectivo}"
# set +x
}

#######################################
# What relative path from an numerordinatio string?
#
Expand Down
12 changes: 4 additions & 8 deletions officinam/999999999/999999_1679.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,12 @@ ROOTDIR="$(pwd)"

# wget --timestamping ftp://ftp.datasus.gov.br/cnes/CNESBRASIL.ZIP --output-document=999999/0/CNESBRASIL.ZIP

archivum_speculo_ex_ftp "ftp://ftp.datasus.gov.br/cnes/CNESBRASIL.ZIP"
# archivum_speculo_ex_ftp "ftp://ftp.datasus.gov.br/cnes/CNESBRASIL.ZIP"

exit 0

if [ ! -d 999999/0/10263485 ]; then mkdir 999999/0/10263485; fi
cd 999999/0/10263485
wget --mirror ftp://ftp.datasus.gov.br/cnes/CNESBRASIL.ZIP
archivum_unzip "999999/0/0/ftp.datasus.gov.br/cnes/CNESBRASIL.ZIP" "xmlCNES.xml" "999999/0/xmlCNES.xml"

# result:
# 999999/0/10263485/ftp.datasus.gov.br/cnes/CNESBRASIL.ZIP
# ./999999999/0/999999999_10263485.py "999999/0/xmlCNES.xml" "999999/0/xmlCNES.tm.hxl.csv"
./999999999/0/999999999_10263485.py "999999/0/xmlCNES.xml" "999999/0/xmlCNES.tm.hxl.csv"

exit 0
#### Manual action, TEST locally, one per time, END ----------------------------
Expand Down

0 comments on commit fca3841

Please sign in to comment.